diff options
author | Koen Kooi <koen@openembedded.org> | 2010-12-04 13:38:13 +0100 |
---|---|---|
committer | Koen Kooi <koen@openembedded.org> | 2010-12-04 13:38:13 +0100 |
commit | 65d9c71b426687634a83b7f6d5378ac01b85e581 (patch) | |
tree | 20e2ae4416b2240f4543e67fe6f82d3dda9f946f /recipes/xorg-lib | |
parent | 672f472b0842b3607835b3e9f7dbf0e3195dae57 (diff) | |
download | openembedded-65d9c71b426687634a83b7f6d5378ac01b85e581.tar.gz |
pixman 0.21.2: updates
* add 12 more NEON patch
* add 1 C fast path
* 1 autofoo fix
* one copyright fix
* rediff existing patches
Signed-off-by: Koen Kooi <koen@openembedded.org>
Diffstat (limited to 'recipes/xorg-lib')
24 files changed, 1588 insertions, 48 deletions
diff --git a/recipes/xorg-lib/pixman-0.21.2/0002-Fix-argument-quoting-for-AC_INIT.patch b/recipes/xorg-lib/pixman-0.21.2/0002-Fix-argument-quoting-for-AC_INIT.patch new file mode 100644 index 0000000000..ebf6eafb0d --- /dev/null +++ b/recipes/xorg-lib/pixman-0.21.2/0002-Fix-argument-quoting-for-AC_INIT.patch @@ -0,0 +1,35 @@ +From e7ee43c39d2370716a4d011afa8f5067eced9899 Mon Sep 17 00:00:00 2001 +From: Cyril Brulebois <kibi@debian.org> +Date: Wed, 17 Nov 2010 16:16:56 +0100 +Subject: [PATCH 02/24] Fix argument quoting for AC_INIT. + +One gets rid of this accordingly: +| autoreconf -vfi +| autoreconf: Entering directory `.' +| autoreconf: configure.ac: not using Gettext +| autoreconf: running: aclocal --force +| configure.ac:61: warning: AC_INIT: not a literal: "pixman@lists.freedesktop.org" +| autoreconf: configure.ac: tracing +| configure.ac:61: warning: AC_INIT: not a literal: "pixman@lists.freedesktop.org" + +Signed-off-by: Cyril Brulebois <kibi@debian.org> +--- + configure.ac | 2 +- + 1 files changed, 1 insertions(+), 1 deletions(-) + +diff --git a/configure.ac b/configure.ac +index db1da21..147e1bf 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -58,7 +58,7 @@ m4_define([pixman_micro], 3) + + m4_define([pixman_version],[pixman_major.pixman_minor.pixman_micro]) + +-AC_INIT(pixman, pixman_version, "pixman@lists.freedesktop.org", pixman) ++AC_INIT(pixman, pixman_version, [pixman@lists.freedesktop.org], pixman) + AM_INIT_AUTOMAKE([foreign dist-bzip2]) + + # Suppress verbose compile lines +-- +1.6.6.1 + diff --git a/recipes/xorg-lib/pixman-0.21.2/0003-Sun-s-copyrights-belong-to-Oracle-now.patch b/recipes/xorg-lib/pixman-0.21.2/0003-Sun-s-copyrights-belong-to-Oracle-now.patch new file mode 100644 index 0000000000..e48a2b37dc --- /dev/null +++ b/recipes/xorg-lib/pixman-0.21.2/0003-Sun-s-copyrights-belong-to-Oracle-now.patch @@ -0,0 +1,39 @@ +From 654961efe405ad1a7e54a77548ca8af322ecc1f8 Mon Sep 17 00:00:00 2001 +From: Alan Coopersmith <alan.coopersmith@oracle.com> +Date: Sun, 21 Nov 2010 11:42:22 -0800 +Subject: [PATCH 03/24] Sun's copyrights belong to Oracle now + +Signed-off-by: Alan Coopersmith <alan.coopersmith@oracle.com> +--- + COPYING | 2 +- + pixman/solaris-hwcap.mapfile | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +diff --git a/COPYING b/COPYING +index 3092a34..15f9517 100644 +--- a/COPYING ++++ b/COPYING +@@ -18,7 +18,7 @@ possible. They may also add themselves to the list below. + * Copyright 2008 André Tupinambá + * Copyright 2008 Mozilla Corporation + * Copyright 2008 Frederic Plourde +- * Copyright 2009 Sun Microsystems, Inc. ++ * Copyright 2009, Oracle and/or its affiliates. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), +diff --git a/pixman/solaris-hwcap.mapfile b/pixman/solaris-hwcap.mapfile +index 3605ca7..87efce1 100644 +--- a/pixman/solaris-hwcap.mapfile ++++ b/pixman/solaris-hwcap.mapfile +@@ -1,6 +1,6 @@ + ############################################################################### + # +-# Copyright 2009 Sun Microsystems, Inc. All rights reserved. ++# Copyright 2009, Oracle and/or its affiliates. All rights reserved. + # + # Permission is hereby granted, free of charge, to any person obtaining a + # copy of this software and associated documentation files (the "Software"), +-- +1.6.6.1 + diff --git a/recipes/xorg-lib/pixman-0.21.2/0004-C-fast-path-for-a1-fill-operation.patch b/recipes/xorg-lib/pixman-0.21.2/0004-C-fast-path-for-a1-fill-operation.patch new file mode 100644 index 0000000000..75eaac7bf2 --- /dev/null +++ b/recipes/xorg-lib/pixman-0.21.2/0004-C-fast-path-for-a1-fill-operation.patch @@ -0,0 +1,159 @@ +From 4b5b5a2a832cd67f2a0ec231f75a2825b45571fa Mon Sep 17 00:00:00 2001 +From: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Mon, 15 Nov 2010 18:26:43 +0200 +Subject: [PATCH 04/24] C fast path for a1 fill operation + +Can be used as one of the solutions to fix bug +https://bugs.freedesktop.org/show_bug.cgi?id=31604 +--- + pixman/pixman-fast-path.c | 87 ++++++++++++++++++++++++++++++++++++++++++++- + pixman/pixman.c | 7 +++- + 2 files changed, 91 insertions(+), 3 deletions(-) + +diff --git a/pixman/pixman-fast-path.c b/pixman/pixman-fast-path.c +index 5d5fa95..37dfbae 100644 +--- a/pixman/pixman-fast-path.c ++++ b/pixman/pixman-fast-path.c +@@ -1334,7 +1334,11 @@ fast_composite_solid_fill (pixman_implementation_t *imp, + + src = _pixman_image_get_solid (src_image, dst_image->bits.format); + +- if (dst_image->bits.format == PIXMAN_a8) ++ if (dst_image->bits.format == PIXMAN_a1) ++ { ++ src = src >> 31; ++ } ++ else if (dst_image->bits.format == PIXMAN_a8) + { + src = src >> 24; + } +@@ -1655,6 +1659,7 @@ static const pixman_fast_path_t c_fast_paths[] = + PIXMAN_STD_FAST_PATH (SRC, solid, null, x8r8g8b8, fast_composite_solid_fill), + PIXMAN_STD_FAST_PATH (SRC, solid, null, a8b8g8r8, fast_composite_solid_fill), + PIXMAN_STD_FAST_PATH (SRC, solid, null, x8b8g8r8, fast_composite_solid_fill), ++ PIXMAN_STD_FAST_PATH (SRC, solid, null, a1, fast_composite_solid_fill), + PIXMAN_STD_FAST_PATH (SRC, solid, null, a8, fast_composite_solid_fill), + PIXMAN_STD_FAST_PATH (SRC, solid, null, r5g6b5, fast_composite_solid_fill), + PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, fast_composite_src_x888_8888), +@@ -1733,6 +1738,82 @@ static const pixman_fast_path_t c_fast_paths[] = + { PIXMAN_OP_NONE }, + }; + ++#ifdef WORDS_BIGENDIAN ++#define A1_FILL_MASK(n, offs) (((1 << (n)) - 1) << (32 - (offs) - (n))) ++#else ++#define A1_FILL_MASK(n, offs) (((1 << (n)) - 1) << (offs)) ++#endif ++ ++static force_inline void ++pixman_fill1_line (uint32_t *dst, int offs, int width, int v) ++{ ++ if (offs) ++ { ++ int leading_pixels = 32 - offs; ++ if (leading_pixels >= width) ++ { ++ if (v) ++ *dst |= A1_FILL_MASK (width, offs); ++ else ++ *dst &= ~A1_FILL_MASK (width, offs); ++ return; ++ } ++ else ++ { ++ if (v) ++ *dst++ |= A1_FILL_MASK (leading_pixels, offs); ++ else ++ *dst++ &= ~A1_FILL_MASK (leading_pixels, offs); ++ width -= leading_pixels; ++ } ++ } ++ while (width >= 32) ++ { ++ if (v) ++ *dst++ = 0xFFFFFFFF; ++ else ++ *dst++ = 0; ++ width -= 32; ++ } ++ if (width > 0) ++ { ++ if (v) ++ *dst |= A1_FILL_MASK (width, 0); ++ else ++ *dst &= ~A1_FILL_MASK (width, 0); ++ } ++} ++ ++static void ++pixman_fill1 (uint32_t *bits, ++ int stride, ++ int x, ++ int y, ++ int width, ++ int height, ++ uint32_t xor) ++{ ++ uint32_t *dst = bits + y * stride + (x >> 5); ++ int offs = x & 31; ++ ++ if (xor & 1) ++ { ++ while (height--) ++ { ++ pixman_fill1_line (dst, offs, width, 1); ++ dst += stride; ++ } ++ } ++ else ++ { ++ while (height--) ++ { ++ pixman_fill1_line (dst, offs, width, 0); ++ dst += stride; ++ } ++ } ++} ++ + static void + pixman_fill8 (uint32_t *bits, + int stride, +@@ -1819,6 +1900,10 @@ fast_path_fill (pixman_implementation_t *imp, + { + switch (bpp) + { ++ case 1: ++ pixman_fill1 (bits, stride, x, y, width, height, xor); ++ break; ++ + case 8: + pixman_fill8 (bits, stride, x, y, width, height, xor); + break; +diff --git a/pixman/pixman.c b/pixman/pixman.c +index 045c556..ec565f9 100644 +--- a/pixman/pixman.c ++++ b/pixman/pixman.c +@@ -875,7 +875,8 @@ color_to_pixel (pixman_color_t * color, + format == PIXMAN_b8g8r8x8 || + format == PIXMAN_r5g6b5 || + format == PIXMAN_b5g6r5 || +- format == PIXMAN_a8)) ++ format == PIXMAN_a8 || ++ format == PIXMAN_a1)) + { + return FALSE; + } +@@ -895,7 +896,9 @@ color_to_pixel (pixman_color_t * color, + ((c & 0x000000ff) << 24); + } + +- if (format == PIXMAN_a8) ++ if (format == PIXMAN_a1) ++ c = c >> 31; ++ else if (format == PIXMAN_a8) + c = c >> 24; + else if (format == PIXMAN_r5g6b5 || + format == PIXMAN_b5g6r5) +-- +1.6.6.1 + diff --git a/recipes/xorg-lib/pixman-0.21.2/0005-ARM-added-neon_composite_over_n_8_8-fast-path.patch b/recipes/xorg-lib/pixman-0.21.2/0005-ARM-added-neon_composite_over_n_8_8-fast-path.patch new file mode 100644 index 0000000000..a7a9b11a87 --- /dev/null +++ b/recipes/xorg-lib/pixman-0.21.2/0005-ARM-added-neon_composite_over_n_8_8-fast-path.patch @@ -0,0 +1,113 @@ +From 98d08b37f17a3379d0ceff8bb7de8f943873fbd8 Mon Sep 17 00:00:00 2001 +From: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Fri, 26 Nov 2010 08:55:49 +0200 +Subject: [PATCH 05/24] ARM: added 'neon_composite_over_n_8_8' fast path + +--- + pixman/pixman-arm-neon-asm.S | 68 ++++++++++++++++++++++++++++++++++++++++++ + pixman/pixman-arm-neon.c | 3 ++ + 2 files changed, 71 insertions(+), 0 deletions(-) + +diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S +index 91ec27d..a3875ee 100644 +--- a/pixman/pixman-arm-neon-asm.S ++++ b/pixman/pixman-arm-neon-asm.S +@@ -1203,6 +1203,74 @@ generate_composite_function \ + + /******************************************************************************/ + ++.macro pixman_composite_over_n_8_8_process_pixblock_head ++ vmull.u8 q0, d24, d8 ++ vmull.u8 q1, d25, d8 ++ vmull.u8 q6, d26, d8 ++ vmull.u8 q7, d27, d8 ++ vrshr.u16 q10, q0, #8 ++ vrshr.u16 q11, q1, #8 ++ vrshr.u16 q12, q6, #8 ++ vrshr.u16 q13, q7, #8 ++ vraddhn.u16 d0, q0, q10 ++ vraddhn.u16 d1, q1, q11 ++ vraddhn.u16 d2, q6, q12 ++ vraddhn.u16 d3, q7, q13 ++ vmvn.8 q12, q0 ++ vmvn.8 q13, q1 ++ vmull.u8 q8, d24, d4 ++ vmull.u8 q9, d25, d5 ++ vmull.u8 q10, d26, d6 ++ vmull.u8 q11, d27, d7 ++.endm ++ ++.macro pixman_composite_over_n_8_8_process_pixblock_tail ++ vrshr.u16 q14, q8, #8 ++ vrshr.u16 q15, q9, #8 ++ vrshr.u16 q12, q10, #8 ++ vrshr.u16 q13, q11, #8 ++ vraddhn.u16 d28, q14, q8 ++ vraddhn.u16 d29, q15, q9 ++ vraddhn.u16 d30, q12, q10 ++ vraddhn.u16 d31, q13, q11 ++ vqadd.u8 q14, q0, q14 ++ vqadd.u8 q15, q1, q15 ++.endm ++ ++/* TODO: expand macros and do better instructions scheduling */ ++.macro pixman_composite_over_n_8_8_process_pixblock_tail_head ++ vld1.8 {d4, d5, d6, d7}, [DST_R, :128]! ++ pixman_composite_over_n_8_8_process_pixblock_tail ++ vld1.8 {d24, d25, d26, d27}, [MASK]! ++ cache_preload 32, 32 ++ vst1.8 {d28, d29, d30, d31}, [DST_W, :128]! ++ pixman_composite_over_n_8_8_process_pixblock_head ++.endm ++ ++.macro pixman_composite_over_n_8_8_init ++ add DUMMY, sp, #ARGS_STACK_OFFSET ++ vpush {d8-d15} ++ vld1.32 {d8[0]}, [DUMMY] ++ vdup.8 d8, d8[3] ++.endm ++ ++.macro pixman_composite_over_n_8_8_cleanup ++ vpop {d8-d15} ++.endm ++ ++generate_composite_function \ ++ pixman_composite_over_n_8_8_asm_neon, 0, 8, 8, \ ++ FLAG_DST_READWRITE, \ ++ 32, /* number of pixels, processed in a single block */ \ ++ 5, /* prefetch distance */ \ ++ pixman_composite_over_n_8_8_init, \ ++ pixman_composite_over_n_8_8_cleanup, \ ++ pixman_composite_over_n_8_8_process_pixblock_head, \ ++ pixman_composite_over_n_8_8_process_pixblock_tail, \ ++ pixman_composite_over_n_8_8_process_pixblock_tail_head ++ ++/******************************************************************************/ ++ + .macro pixman_composite_over_n_8888_8888_ca_process_pixblock_head + /* + * 'combine_mask_ca' replacement +diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c +index 2f82069..72ef75e 100644 +--- a/pixman/pixman-arm-neon.c ++++ b/pixman/pixman-arm-neon.c +@@ -76,6 +76,8 @@ PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (neon, over_n_8_8888, + uint8_t, 1, uint32_t, 1) + PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (neon, over_n_8888_8888_ca, + uint32_t, 1, uint32_t, 1) ++PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (neon, over_n_8_8, ++ uint8_t, 1, uint8_t, 1) + PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (neon, add_n_8_8, + uint8_t, 1, uint8_t, 1) + +@@ -235,6 +237,7 @@ static const pixman_fast_path_t arm_neon_fast_paths[] = + PIXMAN_STD_FAST_PATH (SRC, b8g8r8, null, x8r8g8b8, neon_composite_src_0888_8888_rev), + PIXMAN_STD_FAST_PATH (SRC, b8g8r8, null, r5g6b5, neon_composite_src_0888_0565_rev), + PIXMAN_STD_FAST_PATH (SRC, pixbuf, pixbuf, a8r8g8b8, neon_composite_src_pixbuf_8888), ++ PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8, neon_composite_over_n_8_8), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, neon_composite_over_n_8_0565), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, b5g6r5, neon_composite_over_n_8_0565), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, neon_composite_over_n_8_8888), +-- +1.6.6.1 + diff --git a/recipes/xorg-lib/pixman-0.21.2/0006-ARM-introduced-fetch_mask_pixblock-macro-to-simplify.patch b/recipes/xorg-lib/pixman-0.21.2/0006-ARM-introduced-fetch_mask_pixblock-macro-to-simplify.patch new file mode 100644 index 0000000000..71a41a7a59 --- /dev/null +++ b/recipes/xorg-lib/pixman-0.21.2/0006-ARM-introduced-fetch_mask_pixblock-macro-to-simplify.patch @@ -0,0 +1,157 @@ +From 3be86a92ccab240859062a541cdb871d81c9501a Mon Sep 17 00:00:00 2001 +From: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Sun, 28 Nov 2010 21:45:06 +0200 +Subject: [PATCH 06/24] ARM: introduced 'fetch_mask_pixblock' macro to simplify code + +This macro hides the implementation details of pixels fetching +for the mask image just like 'fetch_src_pixblock' does for the +source image. This provides more possibilities for reusing the +same code blocks in different compositing functions. + +This patch does not introduce any functional changes and the +resulting code in the compiled object file is exactly the same. +--- + pixman/pixman-arm-neon-asm.S | 26 +++++++++++++------------- + pixman/pixman-arm-neon-asm.h | 5 +++++ + 2 files changed, 18 insertions(+), 13 deletions(-) + +diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S +index a3875ee..155a236 100644 +--- a/pixman/pixman-arm-neon-asm.S ++++ b/pixman/pixman-arm-neon-asm.S +@@ -841,7 +841,7 @@ generate_composite_function \ + pixman_composite_over_n_8_0565_process_pixblock_tail + vst1.16 {d28, d29}, [DST_W, :128]! + vld1.16 {d4, d5}, [DST_R, :128]! +- vld1.8 {d24}, [MASK]! ++ fetch_mask_pixblock + cache_preload 8, 8 + pixman_composite_over_n_8_0565_process_pixblock_head + .endm +@@ -889,7 +889,7 @@ generate_composite_function \ + pixman_composite_over_n_8_0565_process_pixblock_tail + fetch_src_pixblock + cache_preload 8, 8 +- vld1.8 {d24}, [MASK]! ++ fetch_mask_pixblock + pixman_composite_over_n_8_0565_process_pixblock_head + vst1.16 {d28, d29}, [DST_W, :128]! + .endm +@@ -1171,7 +1171,7 @@ generate_composite_function \ + pixman_composite_over_n_8_8888_process_pixblock_tail + vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! + vld4.8 {d4, d5, d6, d7}, [DST_R, :128]! +- vld1.8 {d24}, [MASK]! ++ fetch_mask_pixblock + cache_preload 8, 8 + pixman_composite_over_n_8_8888_process_pixblock_head + .endm +@@ -1241,7 +1241,7 @@ generate_composite_function \ + .macro pixman_composite_over_n_8_8_process_pixblock_tail_head + vld1.8 {d4, d5, d6, d7}, [DST_R, :128]! + pixman_composite_over_n_8_8_process_pixblock_tail +- vld1.8 {d24, d25, d26, d27}, [MASK]! ++ fetch_mask_pixblock + cache_preload 32, 32 + vst1.8 {d28, d29, d30, d31}, [DST_W, :128]! + pixman_composite_over_n_8_8_process_pixblock_head +@@ -1341,7 +1341,7 @@ generate_composite_function \ + vraddhn.u16 d29, q15, q9 + vraddhn.u16 d30, q6, q10 + vraddhn.u16 d31, q7, q11 +- vld4.8 {d24, d25, d26, d27}, [MASK]! ++ fetch_mask_pixblock + vqadd.u8 q14, q0, q14 + vqadd.u8 q15, q1, q15 + cache_preload 8, 8 +@@ -1405,7 +1405,7 @@ generate_composite_function \ + pixman_composite_add_n_8_8_process_pixblock_tail + vst1.8 {d28, d29, d30, d31}, [DST_W, :128]! + vld1.8 {d4, d5, d6, d7}, [DST_R, :128]! +- vld1.8 {d24, d25, d26, d27}, [MASK]! ++ fetch_mask_pixblock + cache_preload 32, 32 + pixman_composite_add_n_8_8_process_pixblock_head + .endm +@@ -1462,7 +1462,7 @@ generate_composite_function \ + pixman_composite_add_8_8_8_process_pixblock_tail + vst1.8 {d28, d29, d30, d31}, [DST_W, :128]! + vld1.8 {d4, d5, d6, d7}, [DST_R, :128]! +- vld1.8 {d24, d25, d26, d27}, [MASK]! ++ fetch_mask_pixblock + fetch_src_pixblock + cache_preload 32, 32 + pixman_composite_add_8_8_8_process_pixblock_head +@@ -1515,7 +1515,7 @@ generate_composite_function \ + pixman_composite_add_8888_8888_8888_process_pixblock_tail + vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! + vld4.8 {d4, d5, d6, d7}, [DST_R, :128]! +- vld4.8 {d24, d25, d26, d27}, [MASK]! ++ fetch_mask_pixblock + fetch_src_pixblock + cache_preload 8, 8 + pixman_composite_add_8888_8888_8888_process_pixblock_head +@@ -1587,7 +1587,7 @@ generate_composite_function_single_scanline \ + pixman_composite_out_reverse_8888_n_8888_process_pixblock_tail + fetch_src_pixblock + cache_preload 8, 8 +- vld4.8 {d12, d13, d14, d15}, [MASK]! ++ fetch_mask_pixblock + pixman_composite_out_reverse_8888_n_8888_process_pixblock_head + vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! + .endm +@@ -1658,7 +1658,7 @@ generate_composite_function \ + pixman_composite_over_8888_n_8888_process_pixblock_tail + fetch_src_pixblock + cache_preload 8, 8 +- vld4.8 {d12, d13, d14, d15}, [MASK]! ++ fetch_mask_pixblock + pixman_composite_over_8888_n_8888_process_pixblock_head + vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! + .endm +@@ -1700,7 +1700,7 @@ generate_composite_function_single_scanline \ + pixman_composite_over_8888_n_8888_process_pixblock_tail + fetch_src_pixblock + cache_preload 8, 8 +- vld1.8 {d15}, [MASK]! ++ fetch_mask_pixblock + pixman_composite_over_8888_n_8888_process_pixblock_head + vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! + .endm +@@ -1917,7 +1917,7 @@ generate_composite_function \ + + /* TODO: expand macros and do better instructions scheduling */ + .macro pixman_composite_over_0565_8_0565_process_pixblock_tail_head +- vld1.8 {d15}, [MASK]! ++ fetch_mask_pixblock + pixman_composite_over_0565_8_0565_process_pixblock_tail + fetch_src_pixblock + vld1.16 {d10, d11}, [DST_R, :128]! +@@ -1969,7 +1969,7 @@ generate_composite_function \ + + /* TODO: expand macros and do better instructions scheduling */ + .macro pixman_composite_add_0565_8_0565_process_pixblock_tail_head +- vld1.8 {d15}, [MASK]! ++ fetch_mask_pixblock + pixman_composite_add_0565_8_0565_process_pixblock_tail + fetch_src_pixblock + vld1.16 {d10, d11}, [DST_R, :128]! +diff --git a/pixman/pixman-arm-neon-asm.h b/pixman/pixman-arm-neon-asm.h +index c75bdc3..24fa361 100644 +--- a/pixman/pixman-arm-neon-asm.h ++++ b/pixman/pixman-arm-neon-asm.h +@@ -431,6 +431,11 @@ + .endif + .endm + ++.macro fetch_mask_pixblock ++ pixld pixblock_size, mask_bpp, \ ++ (mask_basereg - pixblock_size * mask_bpp / 64), MASK ++.endm ++ + /* + * Macro which is used to process leading pixels until destination + * pointer is properly aligned (at 16 bytes boundary). When destination +-- +1.6.6.1 + diff --git a/recipes/xorg-lib/pixman-0.21.2/0007-ARM-better-NEON-instructions-scheduling-for-over_n_8.patch b/recipes/xorg-lib/pixman-0.21.2/0007-ARM-better-NEON-instructions-scheduling-for-over_n_8.patch new file mode 100644 index 0000000000..acdfdf873d --- /dev/null +++ b/recipes/xorg-lib/pixman-0.21.2/0007-ARM-better-NEON-instructions-scheduling-for-over_n_8.patch @@ -0,0 +1,170 @@ +From e6814837a6ccd3e4db329e0131eaf2055d2c864b Mon Sep 17 00:00:00 2001 +From: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Fri, 26 Nov 2010 17:06:58 +0200 +Subject: [PATCH 07/24] ARM: better NEON instructions scheduling for over_n_8_0565 + +Code rearranged to get better instructions scheduling for ARM Cortex-A8/A9. +Now it is ~30% faster for the pixel data in L1 cache and makes better use +of memory bandwidth when running at lower clock frequencies (ex. 500MHz). +Also register d24 (pixels from the mask image) is now not clobbered by +supplementary macros, which allows to reuse them for the other variants +of compositing operations later. + +Benchmark from ARM Cortex-A8 @500MHz: + +== before == + + over_n_8_0565 = L1: 63.90 L2: 63.15 M: 60.97 ( 73.53%) + HT: 28.89 VT: 24.14 R: 21.33 RT: 6.78 ( 67Kops/s) + +== after == + + over_n_8_0565 = L1: 82.64 L2: 75.19 M: 71.52 ( 84.14%) + HT: 30.49 VT: 25.56 R: 22.36 RT: 6.89 ( 68Kops/s) +--- + pixman/pixman-arm-neon-asm.S | 120 +++++++++++++++++++++++++++--------------- + 1 files changed, 77 insertions(+), 43 deletions(-) + +diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S +index 155a236..ffffc1c 100644 +--- a/pixman/pixman-arm-neon-asm.S ++++ b/pixman/pixman-arm-neon-asm.S +@@ -792,58 +792,92 @@ generate_composite_function \ + /******************************************************************************/ + + .macro pixman_composite_over_n_8_0565_process_pixblock_head +- /* in */ +- vmull.u8 q0, d24, d8 +- vmull.u8 q1, d24, d9 +- vmull.u8 q6, d24, d10 +- vmull.u8 q7, d24, d11 +- vrshr.u16 q10, q0, #8 +- vrshr.u16 q11, q1, #8 +- vrshr.u16 q12, q6, #8 +- vrshr.u16 q13, q7, #8 +- vraddhn.u16 d0, q0, q10 +- vraddhn.u16 d1, q1, q11 +- vraddhn.u16 d2, q6, q12 +- vraddhn.u16 d3, q7, q13 +- +- vshrn.u16 d6, q2, #8 +- vshrn.u16 d7, q2, #3 +- vsli.u16 q2, q2, #5 +- vsri.u8 d6, d6, #5 +- vmvn.8 d3, d3 +- vsri.u8 d7, d7, #6 +- vshrn.u16 d30, q2, #2 +- /* now do alpha blending */ +- vmull.u8 q10, d3, d6 +- vmull.u8 q11, d3, d7 +- vmull.u8 q12, d3, d30 +- vrshr.u16 q13, q10, #8 +- vrshr.u16 q3, q11, #8 +- vrshr.u16 q15, q12, #8 +- vraddhn.u16 d20, q10, q13 +- vraddhn.u16 d23, q11, q3 +- vraddhn.u16 d22, q12, q15 ++ vmull.u8 q0, d24, d8 /* IN for SRC pixels (part1) */ ++ vmull.u8 q1, d24, d9 ++ vmull.u8 q6, d24, d10 ++ vmull.u8 q7, d24, d11 ++ vshrn.u16 d6, q2, #8 /* convert DST_R data to 32-bpp (part1) */ ++ vshrn.u16 d7, q2, #3 ++ vsli.u16 q2, q2, #5 ++ vrshr.u16 q8, q0, #8 /* IN for SRC pixels (part2) */ ++ vrshr.u16 q9, q1, #8 ++ vrshr.u16 q10, q6, #8 ++ vrshr.u16 q11, q7, #8 ++ vraddhn.u16 d0, q0, q8 ++ vraddhn.u16 d1, q1, q9 ++ vraddhn.u16 d2, q6, q10 ++ vraddhn.u16 d3, q7, q11 ++ vsri.u8 d6, d6, #5 /* convert DST_R data to 32-bpp (part2) */ ++ vsri.u8 d7, d7, #6 ++ vmvn.8 d3, d3 ++ vshrn.u16 d30, q2, #2 ++ vmull.u8 q8, d3, d6 /* now do alpha blending */ ++ vmull.u8 q9, d3, d7 ++ vmull.u8 q10, d3, d30 + .endm + + .macro pixman_composite_over_n_8_0565_process_pixblock_tail +- vqadd.u8 d16, d2, d20 +- vqadd.u8 q9, q0, q11 +- /* convert to r5g6b5 */ +- vshll.u8 q14, d16, #8 +- vshll.u8 q8, d19, #8 +- vshll.u8 q9, d18, #8 +- vsri.u16 q14, q8, #5 +- vsri.u16 q14, q9, #11 ++ /* 3 cycle bubble (after vmull.u8) */ ++ vrshr.u16 q13, q8, #8 ++ vrshr.u16 q11, q9, #8 ++ vrshr.u16 q15, q10, #8 ++ vraddhn.u16 d16, q8, q13 ++ vraddhn.u16 d27, q9, q11 ++ vraddhn.u16 d26, q10, q15 ++ vqadd.u8 d16, d2, d16 ++ /* 1 cycle bubble */ ++ vqadd.u8 q9, q0, q13 ++ vshll.u8 q14, d16, #8 /* convert to 16bpp */ ++ vshll.u8 q8, d19, #8 ++ vshll.u8 q9, d18, #8 ++ vsri.u16 q14, q8, #5 ++ /* 1 cycle bubble */ ++ vsri.u16 q14, q9, #11 + .endm + +-/* TODO: expand macros and do better instructions scheduling */ + .macro pixman_composite_over_n_8_0565_process_pixblock_tail_head +- pixman_composite_over_n_8_0565_process_pixblock_tail +- vst1.16 {d28, d29}, [DST_W, :128]! + vld1.16 {d4, d5}, [DST_R, :128]! ++ vshrn.u16 d6, q2, #8 + fetch_mask_pixblock ++ vshrn.u16 d7, q2, #3 ++ fetch_src_pixblock ++ vmull.u8 q6, d24, d10 ++ vrshr.u16 q13, q8, #8 ++ vrshr.u16 q11, q9, #8 ++ vrshr.u16 q15, q10, #8 ++ vraddhn.u16 d16, q8, q13 ++ vraddhn.u16 d27, q9, q11 ++ vraddhn.u16 d26, q10, q15 ++ vqadd.u8 d16, d2, d16 ++ vmull.u8 q1, d24, d9 ++ vqadd.u8 q9, q0, q13 ++ vshll.u8 q14, d16, #8 ++ vmull.u8 q0, d24, d8 ++ vshll.u8 q8, d19, #8 ++ vshll.u8 q9, d18, #8 ++ vsri.u16 q14, q8, #5 ++ vmull.u8 q7, d24, d11 ++ vsri.u16 q14, q9, #11 ++ + cache_preload 8, 8 +- pixman_composite_over_n_8_0565_process_pixblock_head ++ ++ vsli.u16 q2, q2, #5 ++ vrshr.u16 q8, q0, #8 ++ vrshr.u16 q9, q1, #8 ++ vrshr.u16 q10, q6, #8 ++ vrshr.u16 q11, q7, #8 ++ vraddhn.u16 d0, q0, q8 ++ vraddhn.u16 d1, q1, q9 ++ vraddhn.u16 d2, q6, q10 ++ vraddhn.u16 d3, q7, q11 ++ vsri.u8 d6, d6, #5 ++ vsri.u8 d7, d7, #6 ++ vmvn.8 d3, d3 ++ vshrn.u16 d30, q2, #2 ++ vst1.16 {d28, d29}, [DST_W, :128]! ++ vmull.u8 q8, d3, d6 ++ vmull.u8 q9, d3, d7 ++ vmull.u8 q10, d3, d30 + .endm + + /* +-- +1.6.6.1 + diff --git a/recipes/xorg-lib/pixman-0.21.2/0008-ARM-added-neon_composite_over_8888_n_0565-fast-path.patch b/recipes/xorg-lib/pixman-0.21.2/0008-ARM-added-neon_composite_over_8888_n_0565-fast-path.patch new file mode 100644 index 0000000000..4c5bf8d916 --- /dev/null +++ b/recipes/xorg-lib/pixman-0.21.2/0008-ARM-added-neon_composite_over_8888_n_0565-fast-path.patch @@ -0,0 +1,74 @@ +From a7c36681c0c1955ff9110b81f1789e56abb10a95 Mon Sep 17 00:00:00 2001 +From: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Sat, 27 Nov 2010 03:53:12 +0200 +Subject: [PATCH 08/24] ARM: added 'neon_composite_over_8888_n_0565' fast path + +--- + pixman/pixman-arm-neon-asm.S | 28 ++++++++++++++++++++++++++++ + pixman/pixman-arm-neon.c | 4 ++++ + 2 files changed, 32 insertions(+), 0 deletions(-) + +diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S +index ffffc1c..3e52a49 100644 +--- a/pixman/pixman-arm-neon-asm.S ++++ b/pixman/pixman-arm-neon-asm.S +@@ -917,6 +917,34 @@ generate_composite_function \ + + /******************************************************************************/ + ++.macro pixman_composite_over_8888_n_0565_init ++ add DUMMY, sp, #(ARGS_STACK_OFFSET + 8) ++ vpush {d8-d15} ++ vld1.32 {d24[0]}, [DUMMY] ++ vdup.8 d24, d24[3] ++.endm ++ ++.macro pixman_composite_over_8888_n_0565_cleanup ++ vpop {d8-d15} ++.endm ++ ++generate_composite_function \ ++ pixman_composite_over_8888_n_0565_asm_neon, 32, 0, 16, \ ++ FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ ++ 8, /* number of pixels, processed in a single block */ \ ++ 5, /* prefetch distance */ \ ++ pixman_composite_over_8888_n_0565_init, \ ++ pixman_composite_over_8888_n_0565_cleanup, \ ++ pixman_composite_over_n_8_0565_process_pixblock_head, \ ++ pixman_composite_over_n_8_0565_process_pixblock_tail, \ ++ pixman_composite_over_n_8_0565_process_pixblock_tail_head, \ ++ 28, /* dst_w_basereg */ \ ++ 4, /* dst_r_basereg */ \ ++ 8, /* src_basereg */ \ ++ 24 /* mask_basereg */ ++ ++/******************************************************************************/ ++ + /* TODO: expand macros and do better instructions scheduling */ + .macro pixman_composite_over_8888_8_0565_process_pixblock_tail_head + vld1.16 {d4, d5}, [DST_R, :128]! +diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c +index 72ef75e..8156bbb 100644 +--- a/pixman/pixman-arm-neon.c ++++ b/pixman/pixman-arm-neon.c +@@ -83,6 +83,8 @@ PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (neon, add_n_8_8, + + PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (neon, over_8888_n_8888, + uint32_t, 1, uint32_t, 1) ++PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (neon, over_8888_n_0565, ++ uint32_t, 1, uint16_t, 1) + + PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8_8_8, + uint8_t, 1, uint8_t, 1, uint8_t, 1) +@@ -253,6 +255,8 @@ static const pixman_fast_path_t arm_neon_fast_paths[] = + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, neon_composite_over_n_8888_8888_ca), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, a8r8g8b8, neon_composite_over_8888_n_8888), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, x8r8g8b8, neon_composite_over_8888_n_8888), ++ PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, r5g6b5, neon_composite_over_8888_n_0565), ++ PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, b5g6r5, neon_composite_over_8888_n_0565), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8, a8r8g8b8, neon_composite_over_8888_8_8888), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8, x8r8g8b8, neon_composite_over_8888_8_8888), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8, a8b8g8r8, neon_composite_over_8888_8_8888), +-- +1.6.6.1 + diff --git a/recipes/xorg-lib/pixman-0.21.2/0009-ARM-reuse-common-NEON-code-for-over_-n_8-8888_n-8888.patch b/recipes/xorg-lib/pixman-0.21.2/0009-ARM-reuse-common-NEON-code-for-over_-n_8-8888_n-8888.patch new file mode 100644 index 0000000000..b45671e98e --- /dev/null +++ b/recipes/xorg-lib/pixman-0.21.2/0009-ARM-reuse-common-NEON-code-for-over_-n_8-8888_n-8888.patch @@ -0,0 +1,139 @@ +From 3990931bf6197eff1cec06cf24bce53ddf9a539a Mon Sep 17 00:00:00 2001 +From: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Sat, 27 Nov 2010 04:47:39 +0200 +Subject: [PATCH 09/24] ARM: reuse common NEON code for over_{n_8|8888_n|8888_8}_0565 + +Renamed suppementary macros from 'over_n_8_0565' to 'over_8888_8_0565', +because they can actually support all variants of this operation: +over_8888_8_0565/over_n_8_0565/over_8888_n_0565. + +Also 'over_8888_8_0565' now uses more optimized common code instead of its +own variant, improving performance a bit. Even though this operation is +still memory bandwidth limited, scaled variants of these fast paths may +put more stress on CPU later. + +Benchmarked on ARM Cortex-A8 @500MHz: + +== before == + + over_8888_8_0565 = L1: 67.10 L2: 53.82 M: 44.70 (105.17%) + HT: 18.73 VT: 16.91 R: 14.25 RT: 4.80 (52Kops/s) + +== after == + + over_8888_8_0565 = L1: 77.83 L2: 58.14 M: 44.82 (105.52%) + HT: 20.58 VT: 17.44 R: 15.05 RT: 4.88 (52Kops/s) +--- + pixman/pixman-arm-neon-asm.S | 61 +++++++++++++++++------------------------ + 1 files changed, 25 insertions(+), 36 deletions(-) + +diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S +index 3e52a49..4175144 100644 +--- a/pixman/pixman-arm-neon-asm.S ++++ b/pixman/pixman-arm-neon-asm.S +@@ -791,7 +791,7 @@ generate_composite_function \ + + /******************************************************************************/ + +-.macro pixman_composite_over_n_8_0565_process_pixblock_head ++.macro pixman_composite_over_8888_8_0565_process_pixblock_head + vmull.u8 q0, d24, d8 /* IN for SRC pixels (part1) */ + vmull.u8 q1, d24, d9 + vmull.u8 q6, d24, d10 +@@ -816,7 +816,7 @@ generate_composite_function \ + vmull.u8 q10, d3, d30 + .endm + +-.macro pixman_composite_over_n_8_0565_process_pixblock_tail ++.macro pixman_composite_over_8888_8_0565_process_pixblock_tail + /* 3 cycle bubble (after vmull.u8) */ + vrshr.u16 q13, q8, #8 + vrshr.u16 q11, q9, #8 +@@ -835,7 +835,7 @@ generate_composite_function \ + vsri.u16 q14, q9, #11 + .endm + +-.macro pixman_composite_over_n_8_0565_process_pixblock_tail_head ++.macro pixman_composite_over_8888_8_0565_process_pixblock_tail_head + vld1.16 {d4, d5}, [DST_R, :128]! + vshrn.u16 d6, q2, #8 + fetch_mask_pixblock +@@ -880,6 +880,23 @@ generate_composite_function \ + vmull.u8 q10, d3, d30 + .endm + ++generate_composite_function \ ++ pixman_composite_over_8888_8_0565_asm_neon, 32, 8, 16, \ ++ FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ ++ 8, /* number of pixels, processed in a single block */ \ ++ 5, /* prefetch distance */ \ ++ default_init_need_all_regs, \ ++ default_cleanup_need_all_regs, \ ++ pixman_composite_over_8888_8_0565_process_pixblock_head, \ ++ pixman_composite_over_8888_8_0565_process_pixblock_tail, \ ++ pixman_composite_over_8888_8_0565_process_pixblock_tail_head, \ ++ 28, /* dst_w_basereg */ \ ++ 4, /* dst_r_basereg */ \ ++ 8, /* src_basereg */ \ ++ 24 /* mask_basereg */ ++ ++/******************************************************************************/ ++ + /* + * This function needs a special initialization of solid mask. + * Solid source pixel data is fetched from stack at ARGS_STACK_OFFSET +@@ -911,9 +928,9 @@ generate_composite_function \ + 5, /* prefetch distance */ \ + pixman_composite_over_n_8_0565_init, \ + pixman_composite_over_n_8_0565_cleanup, \ +- pixman_composite_over_n_8_0565_process_pixblock_head, \ +- pixman_composite_over_n_8_0565_process_pixblock_tail, \ +- pixman_composite_over_n_8_0565_process_pixblock_tail_head ++ pixman_composite_over_8888_8_0565_process_pixblock_head, \ ++ pixman_composite_over_8888_8_0565_process_pixblock_tail, \ ++ pixman_composite_over_8888_8_0565_process_pixblock_tail_head + + /******************************************************************************/ + +@@ -935,36 +952,8 @@ generate_composite_function \ + 5, /* prefetch distance */ \ + pixman_composite_over_8888_n_0565_init, \ + pixman_composite_over_8888_n_0565_cleanup, \ +- pixman_composite_over_n_8_0565_process_pixblock_head, \ +- pixman_composite_over_n_8_0565_process_pixblock_tail, \ +- pixman_composite_over_n_8_0565_process_pixblock_tail_head, \ +- 28, /* dst_w_basereg */ \ +- 4, /* dst_r_basereg */ \ +- 8, /* src_basereg */ \ +- 24 /* mask_basereg */ +- +-/******************************************************************************/ +- +-/* TODO: expand macros and do better instructions scheduling */ +-.macro pixman_composite_over_8888_8_0565_process_pixblock_tail_head +- vld1.16 {d4, d5}, [DST_R, :128]! +- pixman_composite_over_n_8_0565_process_pixblock_tail +- fetch_src_pixblock +- cache_preload 8, 8 +- fetch_mask_pixblock +- pixman_composite_over_n_8_0565_process_pixblock_head +- vst1.16 {d28, d29}, [DST_W, :128]! +-.endm +- +-generate_composite_function \ +- pixman_composite_over_8888_8_0565_asm_neon, 32, 8, 16, \ +- FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ +- 8, /* number of pixels, processed in a single block */ \ +- 5, /* prefetch distance */ \ +- default_init_need_all_regs, \ +- default_cleanup_need_all_regs, \ +- pixman_composite_over_n_8_0565_process_pixblock_head, \ +- pixman_composite_over_n_8_0565_process_pixblock_tail, \ ++ pixman_composite_over_8888_8_0565_process_pixblock_head, \ ++ pixman_composite_over_8888_8_0565_process_pixblock_tail, \ + pixman_composite_over_8888_8_0565_process_pixblock_tail_head, \ + 28, /* dst_w_basereg */ \ + 4, /* dst_r_basereg */ \ +-- +1.6.6.1 + diff --git a/recipes/xorg-lib/pixman-0.21.2/0010-ARM-added-neon_composite_over_0565_n_0565-fast-path.patch b/recipes/xorg-lib/pixman-0.21.2/0010-ARM-added-neon_composite_over_0565_n_0565-fast-path.patch new file mode 100644 index 0000000000..376631a50b --- /dev/null +++ b/recipes/xorg-lib/pixman-0.21.2/0010-ARM-added-neon_composite_over_0565_n_0565-fast-path.patch @@ -0,0 +1,74 @@ +From 6d2f7f981b52b41f4321071c325babcf792bd666 Mon Sep 17 00:00:00 2001 +From: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Sat, 27 Nov 2010 15:53:54 +0200 +Subject: [PATCH 10/24] ARM: added 'neon_composite_over_0565_n_0565' fast path + +--- + pixman/pixman-arm-neon-asm.S | 28 ++++++++++++++++++++++++++++ + pixman/pixman-arm-neon.c | 4 ++++ + 2 files changed, 32 insertions(+), 0 deletions(-) + +diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S +index 4175144..81c0a34 100644 +--- a/pixman/pixman-arm-neon-asm.S ++++ b/pixman/pixman-arm-neon-asm.S +@@ -1994,6 +1994,34 @@ generate_composite_function \ + + /******************************************************************************/ + ++.macro pixman_composite_over_0565_n_0565_init ++ add DUMMY, sp, #(ARGS_STACK_OFFSET + 8) ++ vpush {d8-d15} ++ vld1.32 {d15[0]}, [DUMMY] ++ vdup.8 d15, d15[3] ++.endm ++ ++.macro pixman_composite_over_0565_n_0565_cleanup ++ vpop {d8-d15} ++.endm ++ ++generate_composite_function \ ++ pixman_composite_over_0565_n_0565_asm_neon, 16, 0, 16, \ ++ FLAG_DST_READWRITE, \ ++ 8, /* number of pixels, processed in a single block */ \ ++ 5, /* prefetch distance */ \ ++ pixman_composite_over_0565_n_0565_init, \ ++ pixman_composite_over_0565_n_0565_cleanup, \ ++ pixman_composite_over_0565_8_0565_process_pixblock_head, \ ++ pixman_composite_over_0565_8_0565_process_pixblock_tail, \ ++ pixman_composite_over_0565_8_0565_process_pixblock_tail_head, \ ++ 28, /* dst_w_basereg */ \ ++ 10, /* dst_r_basereg */ \ ++ 8, /* src_basereg */ \ ++ 15 /* mask_basereg */ ++ ++/******************************************************************************/ ++ + .macro pixman_composite_add_0565_8_0565_process_pixblock_head + /* mask is in d15 */ + convert_0565_to_x888 q4, d2, d1, d0 +diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c +index 8156bbb..b01c3e0 100644 +--- a/pixman/pixman-arm-neon.c ++++ b/pixman/pixman-arm-neon.c +@@ -85,6 +85,8 @@ PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (neon, over_8888_n_8888, + uint32_t, 1, uint32_t, 1) + PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (neon, over_8888_n_0565, + uint32_t, 1, uint16_t, 1) ++PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (neon, over_0565_n_0565, ++ uint16_t, 1, uint16_t, 1) + + PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8_8_8, + uint8_t, 1, uint8_t, 1, uint8_t, 1) +@@ -257,6 +259,8 @@ static const pixman_fast_path_t arm_neon_fast_paths[] = + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, x8r8g8b8, neon_composite_over_8888_n_8888), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, r5g6b5, neon_composite_over_8888_n_0565), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, b5g6r5, neon_composite_over_8888_n_0565), ++ PIXMAN_STD_FAST_PATH (OVER, r5g6b5, solid, r5g6b5, neon_composite_over_0565_n_0565), ++ PIXMAN_STD_FAST_PATH (OVER, b5g6r5, solid, b5g6r5, neon_composite_over_0565_n_0565), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8, a8r8g8b8, neon_composite_over_8888_8_8888), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8, x8r8g8b8, neon_composite_over_8888_8_8888), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8, a8b8g8r8, neon_composite_over_8888_8_8888), +-- +1.6.6.1 + diff --git a/recipes/xorg-lib/pixman-0.21.2/0011-ARM-added-neon_composite_add_8888_8_8888-fast-path.patch b/recipes/xorg-lib/pixman-0.21.2/0011-ARM-added-neon_composite_add_8888_8_8888-fast-path.patch new file mode 100644 index 0000000000..19f429bbf7 --- /dev/null +++ b/recipes/xorg-lib/pixman-0.21.2/0011-ARM-added-neon_composite_add_8888_8_8888-fast-path.patch @@ -0,0 +1,63 @@ +From c3f48b6aa2f9354af02ffc8c938ec6753fdcbde3 Mon Sep 17 00:00:00 2001 +From: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Sun, 28 Nov 2010 22:05:53 +0200 +Subject: [PATCH 11/24] ARM: added 'neon_composite_add_8888_8_8888' fast path + +--- + pixman/pixman-arm-neon-asm.S | 17 +++++++++++++++++ + pixman/pixman-arm-neon.c | 4 ++++ + 2 files changed, 21 insertions(+), 0 deletions(-) + +diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S +index 81c0a34..11ef166 100644 +--- a/pixman/pixman-arm-neon-asm.S ++++ b/pixman/pixman-arm-neon-asm.S +@@ -1595,6 +1595,23 @@ generate_composite_function_single_scanline \ + + /******************************************************************************/ + ++generate_composite_function \ ++ pixman_composite_add_8888_8_8888_asm_neon, 32, 8, 32, \ ++ FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ ++ 8, /* number of pixels, processed in a single block */ \ ++ 5, /* prefetch distance */ \ ++ default_init, \ ++ default_cleanup, \ ++ pixman_composite_add_8888_8888_8888_process_pixblock_head, \ ++ pixman_composite_add_8888_8888_8888_process_pixblock_tail, \ ++ pixman_composite_add_8888_8888_8888_process_pixblock_tail_head, \ ++ 28, /* dst_w_basereg */ \ ++ 4, /* dst_r_basereg */ \ ++ 0, /* src_basereg */ \ ++ 27 /* mask_basereg */ ++ ++/******************************************************************************/ ++ + .macro pixman_composite_out_reverse_8888_n_8888_process_pixblock_head + /* expecting source data in {d0, d1, d2, d3} */ + /* destination data in {d4, d5, d6, d7} */ +diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c +index b01c3e0..eaf9787 100644 +--- a/pixman/pixman-arm-neon.c ++++ b/pixman/pixman-arm-neon.c +@@ -92,6 +92,8 @@ PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8_8_8, + uint8_t, 1, uint8_t, 1, uint8_t, 1) + PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_0565_8_0565, + uint16_t, 1, uint8_t, 1, uint16_t, 1) ++PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8888_8_8888, ++ uint32_t, 1, uint8_t, 1, uint32_t, 1) + PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8888_8888_8888, + uint32_t, 1, uint32_t, 1, uint32_t, 1) + PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_8888_8_8888, +@@ -282,6 +284,8 @@ static const pixman_fast_path_t arm_neon_fast_paths[] = + PIXMAN_STD_FAST_PATH (ADD, a8, a8, a8, neon_composite_add_8_8_8), + PIXMAN_STD_FAST_PATH (ADD, r5g6b5, a8, r5g6b5, neon_composite_add_0565_8_0565), + PIXMAN_STD_FAST_PATH (ADD, b5g6r5, a8, b5g6r5, neon_composite_add_0565_8_0565), ++ PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, a8, a8r8g8b8, neon_composite_add_8888_8_8888), ++ PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, a8, a8b8g8r8, neon_composite_add_8888_8_8888), + PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, a8r8g8b8, a8r8g8b8, neon_composite_add_8888_8888_8888), + PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, neon_composite_add_8_8), + PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, neon_composite_add_8888_8888), +-- +1.6.6.1 + diff --git a/recipes/xorg-lib/pixman-0.21.2/0012-ARM-better-NEON-instructions-scheduling-for-add_8888.patch b/recipes/xorg-lib/pixman-0.21.2/0012-ARM-better-NEON-instructions-scheduling-for-add_8888.patch new file mode 100644 index 0000000000..28dd8b6051 --- /dev/null +++ b/recipes/xorg-lib/pixman-0.21.2/0012-ARM-better-NEON-instructions-scheduling-for-add_8888.patch @@ -0,0 +1,105 @@ +From 1fba7790367d7b726d05a33bbbcebe10b9280a31 Mon Sep 17 00:00:00 2001 +From: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Mon, 29 Nov 2010 02:10:22 +0200 +Subject: [PATCH 12/24] ARM: better NEON instructions scheduling for add_8888_8888_8888 + +Provides a minor performance improvement by using pipelining and hiding +instructions latencies. Also do not clobber d0-d3 registers (source +image pixels) while doing calculations in order to allow the use of +the same macro for add_n_8_8888 fast path later. + +Benchmark from ARM Cortex-A8 @500MHz: + +== before == + + add_8888_8888_8888 = L1: 95.94 L2: 42.27 M: 25.60 (121.09%) + HT: 14.54 VT: 13.13 R: 12.77 RT: 4.49 (48Kops/s) + add_8888_8_8888 = L1: 104.51 L2: 57.81 M: 36.06 (106.62%) + HT: 19.24 VT: 16.45 R: 14.71 RT: 4.80 (51Kops/s) + +== after == + + add_8888_8888_8888 = L1: 106.66 L2: 47.82 M: 27.32 (129.30%) + HT: 15.44 VT: 13.96 R: 12.86 RT: 4.48 (48Kops/s) + add_8888_8_8888 = L1: 107.72 L2: 61.02 M: 38.26 (113.16%) + HT: 19.48 VT: 16.72 R: 14.82 RT: 4.80 (51Kops/s) +--- + pixman/pixman-arm-neon-asm.S | 52 +++++++++++++++++++++++++++-------------- + 1 files changed, 34 insertions(+), 18 deletions(-) + +diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S +index 11ef166..829ef84 100644 +--- a/pixman/pixman-arm-neon-asm.S ++++ b/pixman/pixman-arm-neon-asm.S +@@ -1542,34 +1542,50 @@ generate_composite_function \ + /* expecting source data in {d0, d1, d2, d3} */ + /* destination data in {d4, d5, d6, d7} */ + /* mask in {d24, d25, d26, d27} */ +- vmull.u8 q8, d27, d0 +- vmull.u8 q9, d27, d1 ++ vmull.u8 q8, d27, d0 ++ vmull.u8 q9, d27, d1 + vmull.u8 q10, d27, d2 + vmull.u8 q11, d27, d3 +- vrshr.u16 q0, q8, #8 +- vrshr.u16 q1, q9, #8 +- vrshr.u16 q12, q10, #8 +- vrshr.u16 q13, q11, #8 +- vraddhn.u16 d0, q0, q8 +- vraddhn.u16 d1, q1, q9 +- vraddhn.u16 d2, q12, q10 +- vraddhn.u16 d3, q13, q11 +- vqadd.u8 q14, q0, q2 +- vqadd.u8 q15, q1, q3 ++ /* 1 cycle bubble */ ++ vrsra.u16 q8, q8, #8 ++ vrsra.u16 q9, q9, #8 ++ vrsra.u16 q10, q10, #8 ++ vrsra.u16 q11, q11, #8 + .endm + + .macro pixman_composite_add_8888_8888_8888_process_pixblock_tail ++ /* 2 cycle bubble */ ++ vrshrn.u16 d28, q8, #8 ++ vrshrn.u16 d29, q9, #8 ++ vrshrn.u16 d30, q10, #8 ++ vrshrn.u16 d31, q11, #8 ++ vqadd.u8 q14, q2, q14 ++ /* 1 cycle bubble */ ++ vqadd.u8 q15, q3, q15 + .endm + +-/* TODO: expand macros and do better instructions scheduling */ + .macro pixman_composite_add_8888_8888_8888_process_pixblock_tail_head +- pixman_composite_add_8888_8888_8888_process_pixblock_tail +- vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! +- vld4.8 {d4, d5, d6, d7}, [DST_R, :128]! +- fetch_mask_pixblock + fetch_src_pixblock ++ vrshrn.u16 d28, q8, #8 ++ fetch_mask_pixblock ++ vrshrn.u16 d29, q9, #8 ++ vmull.u8 q8, d27, d0 ++ vrshrn.u16 d30, q10, #8 ++ vmull.u8 q9, d27, d1 ++ vrshrn.u16 d31, q11, #8 ++ vmull.u8 q10, d27, d2 ++ vqadd.u8 q14, q2, q14 ++ vmull.u8 q11, d27, d3 ++ vqadd.u8 q15, q3, q15 ++ vrsra.u16 q8, q8, #8 ++ vld4.8 {d4, d5, d6, d7}, [DST_R, :128]! ++ vrsra.u16 q9, q9, #8 ++ vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! ++ vrsra.u16 q10, q10, #8 ++ + cache_preload 8, 8 +- pixman_composite_add_8888_8888_8888_process_pixblock_head ++ ++ vrsra.u16 q11, q11, #8 + .endm + + generate_composite_function \ +-- +1.6.6.1 + diff --git a/recipes/xorg-lib/pixman-0.21.2/0013-ARM-added-neon_composite_add_n_8_8888-fast-path.patch b/recipes/xorg-lib/pixman-0.21.2/0013-ARM-added-neon_composite_add_n_8_8888-fast-path.patch new file mode 100644 index 0000000000..a1da09f9bd --- /dev/null +++ b/recipes/xorg-lib/pixman-0.21.2/0013-ARM-added-neon_composite_add_n_8_8888-fast-path.patch @@ -0,0 +1,75 @@ +From b066b520dfaf0a9f4d1bc9a73c789091e9ce7cc8 Mon Sep 17 00:00:00 2001 +From: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Mon, 29 Nov 2010 02:38:52 +0200 +Subject: [PATCH 13/24] ARM: added 'neon_composite_add_n_8_8888' fast path + +--- + pixman/pixman-arm-neon-asm.S | 29 +++++++++++++++++++++++++++++ + pixman/pixman-arm-neon.c | 4 ++++ + 2 files changed, 33 insertions(+), 0 deletions(-) + +diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S +index 829ef84..dd6f2c5 100644 +--- a/pixman/pixman-arm-neon-asm.S ++++ b/pixman/pixman-arm-neon-asm.S +@@ -1628,6 +1628,35 @@ generate_composite_function \ + + /******************************************************************************/ + ++.macro pixman_composite_add_n_8_8888_init ++ add DUMMY, sp, #ARGS_STACK_OFFSET ++ vld1.32 {d3[0]}, [DUMMY] ++ vdup.8 d0, d3[0] ++ vdup.8 d1, d3[1] ++ vdup.8 d2, d3[2] ++ vdup.8 d3, d3[3] ++.endm ++ ++.macro pixman_composite_add_n_8_8888_cleanup ++.endm ++ ++generate_composite_function \ ++ pixman_composite_add_n_8_8888_asm_neon, 0, 8, 32, \ ++ FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ ++ 8, /* number of pixels, processed in a single block */ \ ++ 5, /* prefetch distance */ \ ++ pixman_composite_add_n_8_8888_init, \ ++ pixman_composite_add_n_8_8888_cleanup, \ ++ pixman_composite_add_8888_8888_8888_process_pixblock_head, \ ++ pixman_composite_add_8888_8888_8888_process_pixblock_tail, \ ++ pixman_composite_add_8888_8888_8888_process_pixblock_tail_head, \ ++ 28, /* dst_w_basereg */ \ ++ 4, /* dst_r_basereg */ \ ++ 0, /* src_basereg */ \ ++ 27 /* mask_basereg */ ++ ++/******************************************************************************/ ++ + .macro pixman_composite_out_reverse_8888_n_8888_process_pixblock_head + /* expecting source data in {d0, d1, d2, d3} */ + /* destination data in {d4, d5, d6, d7} */ +diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c +index eaf9787..5ad58bd 100644 +--- a/pixman/pixman-arm-neon.c ++++ b/pixman/pixman-arm-neon.c +@@ -80,6 +80,8 @@ PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (neon, over_n_8_8, + uint8_t, 1, uint8_t, 1) + PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (neon, add_n_8_8, + uint8_t, 1, uint8_t, 1) ++PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (neon, add_n_8_8888, ++ uint8_t, 1, uint32_t, 1) + + PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (neon, over_8888_n_8888, + uint32_t, 1, uint32_t, 1) +@@ -281,6 +283,8 @@ static const pixman_fast_path_t arm_neon_fast_paths[] = + PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, null, a8r8g8b8, neon_composite_src_x888_8888), + PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, null, a8b8g8r8, neon_composite_src_x888_8888), + PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, neon_composite_add_n_8_8), ++ PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8r8g8b8, neon_composite_add_n_8_8888), ++ PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8b8g8r8, neon_composite_add_n_8_8888), + PIXMAN_STD_FAST_PATH (ADD, a8, a8, a8, neon_composite_add_8_8_8), + PIXMAN_STD_FAST_PATH (ADD, r5g6b5, a8, r5g6b5, neon_composite_add_0565_8_0565), + PIXMAN_STD_FAST_PATH (ADD, b5g6r5, a8, b5g6r5, neon_composite_add_0565_8_0565), +-- +1.6.6.1 + diff --git a/recipes/xorg-lib/pixman-0.21.2/0014-ARM-added-neon_composite_add_8888_n_8888-fast-path.patch b/recipes/xorg-lib/pixman-0.21.2/0014-ARM-added-neon_composite_add_8888_n_8888-fast-path.patch new file mode 100644 index 0000000000..0caa29d266 --- /dev/null +++ b/recipes/xorg-lib/pixman-0.21.2/0014-ARM-added-neon_composite_add_8888_n_8888-fast-path.patch @@ -0,0 +1,72 @@ +From f6843e3797eea7e4aed7614b1086f5cefc06c0f9 Mon Sep 17 00:00:00 2001 +From: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Mon, 29 Nov 2010 03:31:32 +0200 +Subject: [PATCH 14/24] ARM: added 'neon_composite_add_8888_n_8888' fast path + +--- + pixman/pixman-arm-neon-asm.S | 26 ++++++++++++++++++++++++++ + pixman/pixman-arm-neon.c | 4 ++++ + 2 files changed, 30 insertions(+), 0 deletions(-) + +diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S +index dd6f2c5..2c0fd37 100644 +--- a/pixman/pixman-arm-neon-asm.S ++++ b/pixman/pixman-arm-neon-asm.S +@@ -1657,6 +1657,32 @@ generate_composite_function \ + + /******************************************************************************/ + ++.macro pixman_composite_add_8888_n_8888_init ++ add DUMMY, sp, #(ARGS_STACK_OFFSET + 8) ++ vld1.32 {d27[0]}, [DUMMY] ++ vdup.8 d27, d27[3] ++.endm ++ ++.macro pixman_composite_add_8888_n_8888_cleanup ++.endm ++ ++generate_composite_function \ ++ pixman_composite_add_8888_n_8888_asm_neon, 32, 0, 32, \ ++ FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ ++ 8, /* number of pixels, processed in a single block */ \ ++ 5, /* prefetch distance */ \ ++ pixman_composite_add_8888_n_8888_init, \ ++ pixman_composite_add_8888_n_8888_cleanup, \ ++ pixman_composite_add_8888_8888_8888_process_pixblock_head, \ ++ pixman_composite_add_8888_8888_8888_process_pixblock_tail, \ ++ pixman_composite_add_8888_8888_8888_process_pixblock_tail_head, \ ++ 28, /* dst_w_basereg */ \ ++ 4, /* dst_r_basereg */ \ ++ 0, /* src_basereg */ \ ++ 27 /* mask_basereg */ ++ ++/******************************************************************************/ ++ + .macro pixman_composite_out_reverse_8888_n_8888_process_pixblock_head + /* expecting source data in {d0, d1, d2, d3} */ + /* destination data in {d4, d5, d6, d7} */ +diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c +index 5ad58bd..f0dc111 100644 +--- a/pixman/pixman-arm-neon.c ++++ b/pixman/pixman-arm-neon.c +@@ -89,6 +89,8 @@ PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (neon, over_8888_n_0565, + uint32_t, 1, uint16_t, 1) + PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (neon, over_0565_n_0565, + uint16_t, 1, uint16_t, 1) ++PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (neon, add_8888_n_8888, ++ uint32_t, 1, uint32_t, 1) + + PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8_8_8, + uint8_t, 1, uint8_t, 1, uint8_t, 1) +@@ -291,6 +293,8 @@ static const pixman_fast_path_t arm_neon_fast_paths[] = + PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, a8, a8r8g8b8, neon_composite_add_8888_8_8888), + PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, a8, a8b8g8r8, neon_composite_add_8888_8_8888), + PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, a8r8g8b8, a8r8g8b8, neon_composite_add_8888_8888_8888), ++ PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, solid, a8r8g8b8, neon_composite_add_8888_n_8888), ++ PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, solid, a8b8g8r8, neon_composite_add_8888_n_8888), + PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, neon_composite_add_8_8), + PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, neon_composite_add_8888_8888), + PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, neon_composite_add_8888_8888), +-- +1.6.6.1 + diff --git a/recipes/xorg-lib/pixman-0.21.2/0015-ARM-added-flags-parameter-to-some-asm-fast-path-wrap.patch b/recipes/xorg-lib/pixman-0.21.2/0015-ARM-added-flags-parameter-to-some-asm-fast-path-wrap.patch new file mode 100644 index 0000000000..5f2448191d --- /dev/null +++ b/recipes/xorg-lib/pixman-0.21.2/0015-ARM-added-flags-parameter-to-some-asm-fast-path-wrap.patch @@ -0,0 +1,153 @@ +From af7a69d90ea2b43a4e850870727723d719f09a1c Mon Sep 17 00:00:00 2001 +From: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Mon, 29 Nov 2010 09:00:46 +0200 +Subject: [PATCH 15/24] ARM: added flags parameter to some asm fast path wrapper macros + +Not all types of operations can be skipped when having transparent +solid source or transparent solid mask. Add an extra flags parameter +for providing this information to the wrappers. +--- + pixman/pixman-arm-common.h | 15 +++++++++------ + pixman/pixman-arm-neon.c | 26 +++++++++++++------------- + pixman/pixman-arm-simd.c | 4 ++-- + 3 files changed, 24 insertions(+), 21 deletions(-) + +diff --git a/pixman/pixman-arm-common.h b/pixman/pixman-arm-common.h +index 2cff6c8..66f448d 100644 +--- a/pixman/pixman-arm-common.h ++++ b/pixman/pixman-arm-common.h +@@ -47,6 +47,9 @@ + * or mask), the corresponding stride argument is unused. + */ + ++#define SKIP_ZERO_SRC 1 ++#define SKIP_ZERO_MASK 2 ++ + #define PIXMAN_ARM_BIND_FAST_PATH_SRC_DST(cputype, name, \ + src_type, src_cnt, \ + dst_type, dst_cnt) \ +@@ -87,7 +90,7 @@ cputype##_composite_##name (pixman_implementation_t *imp, \ + src_line, src_stride); \ + } + +-#define PIXMAN_ARM_BIND_FAST_PATH_N_DST(cputype, name, \ ++#define PIXMAN_ARM_BIND_FAST_PATH_N_DST(flags, cputype, name, \ + dst_type, dst_cnt) \ + void \ + pixman_composite_##name##_asm_##cputype (int32_t w, \ +@@ -117,7 +120,7 @@ cputype##_composite_##name (pixman_implementation_t *imp, \ + \ + src = _pixman_image_get_solid (src_image, dst_image->bits.format); \ + \ +- if (src == 0) \ ++ if ((flags & SKIP_ZERO_SRC) && src == 0) \ + return; \ + \ + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type, \ +@@ -128,7 +131,7 @@ cputype##_composite_##name (pixman_implementation_t *imp, \ + src); \ + } + +-#define PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST(cputype, name, \ ++#define PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST(flags, cputype, name, \ + mask_type, mask_cnt, \ + dst_type, dst_cnt) \ + void \ +@@ -163,7 +166,7 @@ cputype##_composite_##name (pixman_implementation_t *imp, \ + \ + src = _pixman_image_get_solid (src_image, dst_image->bits.format); \ + \ +- if (src == 0) \ ++ if ((flags & SKIP_ZERO_SRC) && src == 0) \ + return; \ + \ + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type, \ +@@ -177,7 +180,7 @@ cputype##_composite_##name (pixman_implementation_t *imp, \ + mask_line, mask_stride); \ + } + +-#define PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST(cputype, name, \ ++#define PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST(flags, cputype, name, \ + src_type, src_cnt, \ + dst_type, dst_cnt) \ + void \ +@@ -211,7 +214,7 @@ cputype##_composite_##name (pixman_implementation_t *imp, \ + \ + mask = _pixman_image_get_solid (mask_image, dst_image->bits.format);\ + \ +- if (mask == 0) \ ++ if ((flags & SKIP_ZERO_MASK) && mask == 0) \ + return; \ + \ + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type, \ +diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c +index f0dc111..1a3741c 100644 +--- a/pixman/pixman-arm-neon.c ++++ b/pixman/pixman-arm-neon.c +@@ -63,33 +63,33 @@ PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, over_8888_8888, + PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, out_reverse_8_0565, + uint8_t, 1, uint16_t, 1) + +-PIXMAN_ARM_BIND_FAST_PATH_N_DST (neon, over_n_0565, ++PIXMAN_ARM_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, neon, over_n_0565, + uint16_t, 1) +-PIXMAN_ARM_BIND_FAST_PATH_N_DST (neon, over_n_8888, ++PIXMAN_ARM_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, neon, over_n_8888, + uint32_t, 1) +-PIXMAN_ARM_BIND_FAST_PATH_N_DST (neon, over_reverse_n_8888, ++PIXMAN_ARM_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, neon, over_reverse_n_8888, + uint32_t, 1) + +-PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (neon, over_n_8_0565, ++PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8_0565, + uint8_t, 1, uint16_t, 1) +-PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (neon, over_n_8_8888, ++PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8_8888, + uint8_t, 1, uint32_t, 1) +-PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (neon, over_n_8888_8888_ca, ++PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8888_8888_ca, + uint32_t, 1, uint32_t, 1) +-PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (neon, over_n_8_8, ++PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8_8, + uint8_t, 1, uint8_t, 1) +-PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (neon, add_n_8_8, ++PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, add_n_8_8, + uint8_t, 1, uint8_t, 1) +-PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (neon, add_n_8_8888, ++PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, add_n_8_8888, + uint8_t, 1, uint32_t, 1) + +-PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (neon, over_8888_n_8888, ++PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, neon, over_8888_n_8888, + uint32_t, 1, uint32_t, 1) +-PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (neon, over_8888_n_0565, ++PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, neon, over_8888_n_0565, + uint32_t, 1, uint16_t, 1) +-PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (neon, over_0565_n_0565, ++PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, neon, over_0565_n_0565, + uint16_t, 1, uint16_t, 1) +-PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (neon, add_8888_n_8888, ++PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, neon, add_8888_n_8888, + uint32_t, 1, uint32_t, 1) + + PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8_8_8, +diff --git a/pixman/pixman-arm-simd.c b/pixman/pixman-arm-simd.c +index 3b05007..dc2f471 100644 +--- a/pixman/pixman-arm-simd.c ++++ b/pixman/pixman-arm-simd.c +@@ -381,10 +381,10 @@ PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, add_8_8, + PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, over_8888_8888, + uint32_t, 1, uint32_t, 1) + +-PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (armv6, over_8888_n_8888, ++PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, armv6, over_8888_n_8888, + uint32_t, 1, uint32_t, 1) + +-PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (armv6, over_n_8_8888, ++PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, armv6, over_n_8_8888, + uint8_t, 1, uint32_t, 1) + + PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (armv6, 0565_0565, SRC, +-- +1.6.6.1 + diff --git a/recipes/xorg-lib/pixman-0.21.2/0016-ARM-added-neon_composite_in_n_8-fast-path.patch b/recipes/xorg-lib/pixman-0.21.2/0016-ARM-added-neon_composite_in_n_8-fast-path.patch new file mode 100644 index 0000000000..8a22f54451 --- /dev/null +++ b/recipes/xorg-lib/pixman-0.21.2/0016-ARM-added-neon_composite_in_n_8-fast-path.patch @@ -0,0 +1,97 @@ +From 733f68912f4a44c24ad3973049a7e1d98f4c6ea8 Mon Sep 17 00:00:00 2001 +From: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Mon, 29 Nov 2010 09:11:29 +0200 +Subject: [PATCH 16/24] ARM: added 'neon_composite_in_n_8' fast path + +--- + pixman/pixman-arm-neon-asm.S | 52 ++++++++++++++++++++++++++++++++++++++++++ + pixman/pixman-arm-neon.c | 3 ++ + 2 files changed, 55 insertions(+), 0 deletions(-) + +diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S +index 2c0fd37..cf014fa 100644 +--- a/pixman/pixman-arm-neon-asm.S ++++ b/pixman/pixman-arm-neon-asm.S +@@ -1427,6 +1427,58 @@ generate_composite_function \ + + /******************************************************************************/ + ++.macro pixman_composite_in_n_8_process_pixblock_head ++ /* expecting source data in {d0, d1, d2, d3} */ ++ /* and destination data in {d4, d5, d6, d7} */ ++ vmull.u8 q8, d4, d3 ++ vmull.u8 q9, d5, d3 ++ vmull.u8 q10, d6, d3 ++ vmull.u8 q11, d7, d3 ++.endm ++ ++.macro pixman_composite_in_n_8_process_pixblock_tail ++ vrshr.u16 q14, q8, #8 ++ vrshr.u16 q15, q9, #8 ++ vrshr.u16 q12, q10, #8 ++ vrshr.u16 q13, q11, #8 ++ vraddhn.u16 d28, q8, q14 ++ vraddhn.u16 d29, q9, q15 ++ vraddhn.u16 d30, q10, q12 ++ vraddhn.u16 d31, q11, q13 ++.endm ++ ++.macro pixman_composite_in_n_8_process_pixblock_tail_head ++ pixman_composite_in_n_8_process_pixblock_tail ++ vld1.8 {d4, d5, d6, d7}, [DST_R, :128]! ++ cache_preload 32, 32 ++ pixman_composite_in_n_8_process_pixblock_head ++ vst1.8 {d28, d29, d30, d31}, [DST_W, :128]! ++.endm ++ ++.macro pixman_composite_in_n_8_init ++ add DUMMY, sp, #ARGS_STACK_OFFSET ++ vld1.32 {d3[0]}, [DUMMY] ++ vdup.8 d3, d3[3] ++.endm ++ ++.macro pixman_composite_in_n_8_cleanup ++.endm ++ ++generate_composite_function \ ++ pixman_composite_in_n_8_asm_neon, 0, 0, 8, \ ++ FLAG_DST_READWRITE, \ ++ 32, /* number of pixels, processed in a single block */ \ ++ 5, /* prefetch distance */ \ ++ pixman_composite_in_n_8_init, \ ++ pixman_composite_in_n_8_cleanup, \ ++ pixman_composite_in_n_8_process_pixblock_head, \ ++ pixman_composite_in_n_8_process_pixblock_tail, \ ++ pixman_composite_in_n_8_process_pixblock_tail_head, \ ++ 28, /* dst_w_basereg */ \ ++ 4, /* dst_r_basereg */ \ ++ 0, /* src_basereg */ \ ++ 24 /* mask_basereg */ ++ + .macro pixman_composite_add_n_8_8_process_pixblock_head + /* expecting source data in {d8, d9, d10, d11} */ + /* d8 - blue, d9 - green, d10 - red, d11 - alpha */ +diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c +index 1a3741c..e3eca2b 100644 +--- a/pixman/pixman-arm-neon.c ++++ b/pixman/pixman-arm-neon.c +@@ -69,6 +69,8 @@ PIXMAN_ARM_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, neon, over_n_8888, + uint32_t, 1) + PIXMAN_ARM_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, neon, over_reverse_n_8888, + uint32_t, 1) ++PIXMAN_ARM_BIND_FAST_PATH_N_DST (0, neon, in_n_8, ++ uint8_t, 1) + + PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8_0565, + uint8_t, 1, uint16_t, 1) +@@ -298,6 +300,7 @@ static const pixman_fast_path_t arm_neon_fast_paths[] = + PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, neon_composite_add_8_8), + PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, neon_composite_add_8888_8888), + PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, neon_composite_add_8888_8888), ++ PIXMAN_STD_FAST_PATH (IN, solid, null, a8, neon_composite_in_n_8), + PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8r8g8b8, neon_composite_over_reverse_n_8888), + PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8b8g8r8, neon_composite_over_reverse_n_8888), + PIXMAN_STD_FAST_PATH (OUT_REVERSE, a8, null, r5g6b5, neon_composite_out_reverse_8_0565), +-- +1.6.6.1 + diff --git a/recipes/xorg-lib/pixman-0.21.2/0000-Add-pixman_bits_override_accessors.patch b/recipes/xorg-lib/pixman-0.21.2/0017-add-_pixman_bits_override_accessors.patch index 1687bd4d06..a8148d9542 100644 --- a/recipes/xorg-lib/pixman-0.21.2/0000-Add-pixman_bits_override_accessors.patch +++ b/recipes/xorg-lib/pixman-0.21.2/0017-add-_pixman_bits_override_accessors.patch @@ -1,7 +1,7 @@ -From fff598814365037c8ffdd97afe10a14bb09558fc Mon Sep 17 00:00:00 2001 +From 6593d86679fde724e49efa96b16ca22d9521b288 Mon Sep 17 00:00:00 2001 From: Siarhei Siamashka <siarhei.siamashka@nokia.com> Date: Thu, 10 Dec 2009 00:51:50 +0200 -Subject: [PATCH 1/8] add _pixman_bits_override_accessors +Subject: [PATCH 17/24] add _pixman_bits_override_accessors * from patch ARM: HACK: added NEON optimizations for fetch/store r5g6b5 scanline * used in @@ -71,5 +71,5 @@ index 383748a..969dfab 100644 int x, int y, -- -1.7.3.2 +1.6.6.1 diff --git a/recipes/xorg-lib/pixman-0.21.2/0001-Generic-C-implementation-of-pixman_blt-with-overlapp.patch b/recipes/xorg-lib/pixman-0.21.2/0018-Generic-C-implementation-of-pixman_blt-with-overlapp.patch index d438dfce5e..5b1c1089ed 100644 --- a/recipes/xorg-lib/pixman-0.21.2/0001-Generic-C-implementation-of-pixman_blt-with-overlapp.patch +++ b/recipes/xorg-lib/pixman-0.21.2/0018-Generic-C-implementation-of-pixman_blt-with-overlapp.patch @@ -1,7 +1,7 @@ -From 6b162fb9d4ede5faa25f24188964f31d7667e74e Mon Sep 17 00:00:00 2001 +From 8e8b2809b505486001dc213becab0d50bfd96c1b Mon Sep 17 00:00:00 2001 From: Siarhei Siamashka <siarhei.siamashka@nokia.com> Date: Tue, 16 Mar 2010 16:55:28 +0100 -Subject: [PATCH 1/7] Generic C implementation of pixman_blt with overlapping support +Subject: [PATCH 18/24] Generic C implementation of pixman_blt with overlapping support Uses memcpy/memmove functions to copy pixels, can handle the case when both source and destination areas are in the same @@ -110,5 +110,5 @@ index 969dfab..352bceb 100644 + #endif /* PIXMAN_PRIVATE_H */ -- -1.7.3.2 +1.6.6.1 diff --git a/recipes/xorg-lib/pixman-0.21.2/0002-Support-of-overlapping-src-dst-for-pixman_blt_mmx.patch b/recipes/xorg-lib/pixman-0.21.2/0019-Support-of-overlapping-src-dst-for-pixman_blt_mmx.patch index e86e8ed193..5193d38f74 100644 --- a/recipes/xorg-lib/pixman-0.21.2/0002-Support-of-overlapping-src-dst-for-pixman_blt_mmx.patch +++ b/recipes/xorg-lib/pixman-0.21.2/0019-Support-of-overlapping-src-dst-for-pixman_blt_mmx.patch @@ -1,7 +1,7 @@ -From 2d6cc769d233c0b1a391b501e84f5c3b0f1af4f8 Mon Sep 17 00:00:00 2001 +From f5a54f7d5eb1169bc79f0e445e2998e98080ef13 Mon Sep 17 00:00:00 2001 From: Siarhei Siamashka <siarhei.siamashka@nokia.com> Date: Thu, 22 Oct 2009 05:45:47 +0300 -Subject: [PATCH 2/7] Support of overlapping src/dst for pixman_blt_mmx +Subject: [PATCH 19/24] Support of overlapping src/dst for pixman_blt_mmx --- pixman/pixman-mmx.c | 55 +++++++++++++++++++++++++++++--------------------- @@ -87,5 +87,5 @@ index 34637a4..f9dd473 100644 while (w >= 2 && ((unsigned long)d & 3)) { -- -1.7.3.2 +1.6.6.1 diff --git a/recipes/xorg-lib/pixman-0.21.2/0003-Support-of-overlapping-src-dst-for-pixman_blt_sse2.patch b/recipes/xorg-lib/pixman-0.21.2/0020-Support-of-overlapping-src-dst-for-pixman_blt_sse2.patch index 6fdfa5df22..f5c0e12f24 100644 --- a/recipes/xorg-lib/pixman-0.21.2/0003-Support-of-overlapping-src-dst-for-pixman_blt_sse2.patch +++ b/recipes/xorg-lib/pixman-0.21.2/0020-Support-of-overlapping-src-dst-for-pixman_blt_sse2.patch @@ -1,7 +1,7 @@ -From 532b8f45cee61ea2509a7f263dd30f40f3de29ba Mon Sep 17 00:00:00 2001 +From c8755294fa9ea396f7113370230b17c424a93be1 Mon Sep 17 00:00:00 2001 From: Siarhei Siamashka <siarhei.siamashka@nokia.com> Date: Thu, 22 Oct 2009 05:45:54 +0300 -Subject: [PATCH 3/7] Support of overlapping src/dst for pixman_blt_sse2 +Subject: [PATCH 20/24] Support of overlapping src/dst for pixman_blt_sse2 --- pixman/pixman-sse2.c | 55 +++++++++++++++++++++++++++++-------------------- @@ -87,5 +87,5 @@ index 5907de0..25015ae 100644 while (w >= 2 && ((unsigned long)d & 3)) { -- -1.7.3.2 +1.6.6.1 diff --git a/recipes/xorg-lib/pixman-0.21.2/0004-Support-of-overlapping-src-dst-for-pixman_blt_neon.patch b/recipes/xorg-lib/pixman-0.21.2/0021-Support-of-overlapping-src-dst-for-pixman_blt_neon.patch index 4950dd8479..0eb9d88eba 100644 --- a/recipes/xorg-lib/pixman-0.21.2/0004-Support-of-overlapping-src-dst-for-pixman_blt_neon.patch +++ b/recipes/xorg-lib/pixman-0.21.2/0021-Support-of-overlapping-src-dst-for-pixman_blt_neon.patch @@ -1,17 +1,17 @@ -From 4e101b976fa5fc72e44553a15516b804ffda0394 Mon Sep 17 00:00:00 2001 +From 86c8198598ef6d639e656c04644015795cc249aa Mon Sep 17 00:00:00 2001 From: Siarhei Siamashka <siarhei.siamashka@nokia.com> Date: Wed, 18 Nov 2009 06:08:48 +0200 -Subject: [PATCH 4/7] Support of overlapping src/dst for pixman_blt_neon +Subject: [PATCH 21/24] Support of overlapping src/dst for pixman_blt_neon --- pixman/pixman-arm-neon.c | 62 +++++++++++++++++++++++++++++++++++++-------- 1 files changed, 51 insertions(+), 11 deletions(-) diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c -index 2f82069..6a6ed37 100644 +index e3eca2b..74316a8 100644 --- a/pixman/pixman-arm-neon.c +++ b/pixman/pixman-arm-neon.c -@@ -185,26 +185,66 @@ pixman_blt_neon (uint32_t *src_bits, +@@ -199,26 +199,66 @@ pixman_blt_neon (uint32_t *src_bits, int width, int height) { @@ -90,5 +90,5 @@ index 2f82069..6a6ed37 100644 default: return FALSE; -- -1.7.3.2 +1.6.6.1 diff --git a/recipes/xorg-lib/pixman-0.21.2/0005-ARM-added-NEON-optimizations-for-fetch-store-r5g6b5-.patch b/recipes/xorg-lib/pixman-0.21.2/0022-ARM-added-NEON-optimizations-for-fetch-store-r5g6b5-.patch index 459c73481b..129c1f1bb6 100644 --- a/recipes/xorg-lib/pixman-0.21.2/0005-ARM-added-NEON-optimizations-for-fetch-store-r5g6b5-.patch +++ b/recipes/xorg-lib/pixman-0.21.2/0022-ARM-added-NEON-optimizations-for-fetch-store-r5g6b5-.patch @@ -1,7 +1,7 @@ -From 8d7a77b6780af1b96db32026fb8d79c5603f0fba Mon Sep 17 00:00:00 2001 +From 60d972afbae8613d700d3a6b3cb107429d7e11c6 Mon Sep 17 00:00:00 2001 From: Siarhei Siamashka <siarhei.siamashka@nokia.com> Date: Thu, 10 Dec 2009 00:51:50 +0200 -Subject: [PATCH 5/7] ARM: added NEON optimizations for fetch/store r5g6b5 scanline +Subject: [PATCH 22/24] ARM: added NEON optimizations for fetch/store r5g6b5 scanline --- pixman/pixman-arm-neon-asm.S | 20 ++++++++++++++++++++ @@ -9,7 +9,7 @@ Subject: [PATCH 5/7] ARM: added NEON optimizations for fetch/store r5g6b5 scanli 2 files changed, 60 insertions(+), 0 deletions(-) diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S -index 91ec27d..b838f92 100644 +index cf014fa..25f7bf0 100644 --- a/pixman/pixman-arm-neon-asm.S +++ b/pixman/pixman-arm-neon-asm.S @@ -459,6 +459,16 @@ generate_composite_function \ @@ -47,10 +47,10 @@ index 91ec27d..b838f92 100644 .macro pixman_composite_add_8_8_process_pixblock_head diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c -index 6a6ed37..4b2bbea 100644 +index 74316a8..f773e92 100644 --- a/pixman/pixman-arm-neon.c +++ b/pixman/pixman-arm-neon.c -@@ -422,6 +422,42 @@ BIND_COMBINE_U (over) +@@ -448,6 +448,42 @@ BIND_COMBINE_U (over) BIND_COMBINE_U (add) BIND_COMBINE_U (out_reverse) @@ -93,7 +93,7 @@ index 6a6ed37..4b2bbea 100644 pixman_implementation_t * _pixman_implementation_create_arm_neon (void) { -@@ -437,6 +473,10 @@ _pixman_implementation_create_arm_neon (void) +@@ -463,6 +499,10 @@ _pixman_implementation_create_arm_neon (void) imp->combine_32[PIXMAN_OP_ADD] = neon_combine_add_u; imp->combine_32[PIXMAN_OP_OUT_REVERSE] = neon_combine_out_reverse_u; @@ -105,5 +105,5 @@ index 6a6ed37..4b2bbea 100644 imp->fill = arm_neon_fill; -- -1.7.3.2 +1.6.6.1 diff --git a/recipes/xorg-lib/pixman-0.21.2/0006-ARM-added-NEON-optimizations-for-fetch-store-a8-scan.patch b/recipes/xorg-lib/pixman-0.21.2/0023-ARM-added-NEON-optimizations-for-fetch-store-a8-scan.patch index c3171474e2..7724f5433e 100644 --- a/recipes/xorg-lib/pixman-0.21.2/0006-ARM-added-NEON-optimizations-for-fetch-store-a8-scan.patch +++ b/recipes/xorg-lib/pixman-0.21.2/0023-ARM-added-NEON-optimizations-for-fetch-store-a8-scan.patch @@ -1,7 +1,7 @@ -From b689ddce66ce6391b6478d870f00fe21bbce944c Mon Sep 17 00:00:00 2001 +From cc99d8d6fcbabd7f9f3ed99e65c78a2fb71792fa Mon Sep 17 00:00:00 2001 From: Siarhei Siamashka <siarhei.siamashka@nokia.com> Date: Thu, 23 Sep 2010 21:10:56 +0300 -Subject: [PATCH 6/7] ARM: added NEON optimizations for fetch/store a8 scanline +Subject: [PATCH 23/24] ARM: added NEON optimizations for fetch/store a8 scanline --- pixman/pixman-arm-neon-asm.S | 64 ++++++++++++++++++++++++++++++++++++++++++ @@ -9,7 +9,7 @@ Subject: [PATCH 6/7] ARM: added NEON optimizations for fetch/store a8 scanline 2 files changed, 106 insertions(+), 0 deletions(-) diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S -index b838f92..8e43a3b 100644 +index 25f7bf0..439b06b 100644 --- a/pixman/pixman-arm-neon-asm.S +++ b/pixman/pixman-arm-neon-asm.S @@ -418,6 +418,70 @@ generate_composite_function \ @@ -84,10 +84,10 @@ index b838f92..8e43a3b 100644 vshll.u8 q8, d1, #8 vshll.u8 q14, d2, #8 diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c -index 4b2bbea..1c68d32 100644 +index f773e92..55219b3 100644 --- a/pixman/pixman-arm-neon.c +++ b/pixman/pixman-arm-neon.c -@@ -458,6 +458,45 @@ neon_store_scanline_r5g6b5 (bits_image_t * image, +@@ -484,6 +484,45 @@ neon_store_scanline_r5g6b5 (bits_image_t * image, pixman_store_scanline_r5g6b5_asm_neon (width, pixel, values); } @@ -133,7 +133,7 @@ index 4b2bbea..1c68d32 100644 pixman_implementation_t * _pixman_implementation_create_arm_neon (void) { -@@ -476,6 +515,9 @@ _pixman_implementation_create_arm_neon (void) +@@ -502,6 +541,9 @@ _pixman_implementation_create_arm_neon (void) _pixman_bits_override_accessors (PIXMAN_r5g6b5, neon_fetch_scanline_r5g6b5, neon_store_scanline_r5g6b5); @@ -144,5 +144,5 @@ index 4b2bbea..1c68d32 100644 imp->blt = arm_neon_blt; imp->fill = arm_neon_fill; -- -1.7.3.2 +1.6.6.1 diff --git a/recipes/xorg-lib/pixman-0.21.2/0007-ARM-added-NEON-optimizations-for-fetching-x8r8g8b8-s.patch b/recipes/xorg-lib/pixman-0.21.2/0024-ARM-added-NEON-optimizations-for-fetching-x8r8g8b8-s.patch index 1abd28a16b..8253f41b8f 100644 --- a/recipes/xorg-lib/pixman-0.21.2/0007-ARM-added-NEON-optimizations-for-fetching-x8r8g8b8-s.patch +++ b/recipes/xorg-lib/pixman-0.21.2/0024-ARM-added-NEON-optimizations-for-fetching-x8r8g8b8-s.patch @@ -1,7 +1,7 @@ -From 912d7b4f79cda5dd828f5db7608314057a39338e Mon Sep 17 00:00:00 2001 +From cf3b8fdc53144ff62c4054996559d3a1a4d62b75 Mon Sep 17 00:00:00 2001 From: Siarhei Siamashka <siarhei.siamashka@nokia.com> Date: Fri, 24 Sep 2010 18:22:44 +0300 -Subject: [PATCH 7/7] ARM: added NEON optimizations for fetching x8r8g8b8 scanline +Subject: [PATCH 24/24] ARM: added NEON optimizations for fetching x8r8g8b8 scanline --- pixman/pixman-arm-neon-asm.S | 14 ++++++++++++++ @@ -9,10 +9,10 @@ Subject: [PATCH 7/7] ARM: added NEON optimizations for fetching x8r8g8b8 scanlin 2 files changed, 35 insertions(+), 0 deletions(-) diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S -index 8e43a3b..5ebee5a 100644 +index 439b06b..3e0dcfe 100644 --- a/pixman/pixman-arm-neon-asm.S +++ b/pixman/pixman-arm-neon-asm.S -@@ -1206,6 +1206,20 @@ generate_composite_function \ +@@ -1257,6 +1257,20 @@ generate_composite_function \ 0, /* src_basereg */ \ 0 /* mask_basereg */ @@ -34,10 +34,10 @@ index 8e43a3b..5ebee5a 100644 .macro pixman_composite_over_n_8_8888_process_pixblock_head diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c -index 1c68d32..0bcfc54 100644 +index 55219b3..8cef414 100644 --- a/pixman/pixman-arm-neon.c +++ b/pixman/pixman-arm-neon.c -@@ -496,6 +496,24 @@ neon_store_scanline_a8 (bits_image_t * image, +@@ -522,6 +522,24 @@ neon_store_scanline_a8 (bits_image_t * image, pixman_store_scanline_a8_asm_neon (width, pixel, values); } @@ -62,7 +62,7 @@ index 1c68d32..0bcfc54 100644 pixman_implementation_t * _pixman_implementation_create_arm_neon (void) -@@ -518,6 +536,9 @@ _pixman_implementation_create_arm_neon (void) +@@ -544,6 +562,9 @@ _pixman_implementation_create_arm_neon (void) _pixman_bits_override_accessors (PIXMAN_a8, neon_fetch_scanline_a8, neon_store_scanline_a8); @@ -73,5 +73,5 @@ index 1c68d32..0bcfc54 100644 imp->blt = arm_neon_blt; imp->fill = arm_neon_fill; -- -1.7.3.2 +1.6.6.1 diff --git a/recipes/xorg-lib/pixman_0.21.2.bb b/recipes/xorg-lib/pixman_0.21.2.bb index 7e361b66f3..19394d635b 100644 --- a/recipes/xorg-lib/pixman_0.21.2.bb +++ b/recipes/xorg-lib/pixman_0.21.2.bb @@ -3,17 +3,32 @@ require pixman.inc SRC_URI[archive.md5sum] = "9e09fd6e58cbf9717140891e0b7d4a7a" SRC_URI[archive.sha256sum] = "295f51416caf307ff7caf1153ee9b1d86b9f7f02a7876d12db6538d80451c5de" -PR = "${INC_PR}.0" +PR = "${INC_PR}.1" SRC_URI += "\ - file://0000-Add-pixman_bits_override_accessors.patch \ - file://0001-Generic-C-implementation-of-pixman_blt-with-overlapp.patch \ - file://0002-Support-of-overlapping-src-dst-for-pixman_blt_mmx.patch \ - file://0003-Support-of-overlapping-src-dst-for-pixman_blt_sse2.patch \ - file://0004-Support-of-overlapping-src-dst-for-pixman_blt_neon.patch \ - file://0005-ARM-added-NEON-optimizations-for-fetch-store-r5g6b5-.patch \ - file://0006-ARM-added-NEON-optimizations-for-fetch-store-a8-scan.patch \ - file://0007-ARM-added-NEON-optimizations-for-fetching-x8r8g8b8-s.patch \ + file://0002-Fix-argument-quoting-for-AC_INIT.patch \ + file://0003-Sun-s-copyrights-belong-to-Oracle-now.patch \ + file://0004-C-fast-path-for-a1-fill-operation.patch \ + file://0005-ARM-added-neon_composite_over_n_8_8-fast-path.patch \ + file://0006-ARM-introduced-fetch_mask_pixblock-macro-to-simplify.patch \ + file://0007-ARM-better-NEON-instructions-scheduling-for-over_n_8.patch \ + file://0008-ARM-added-neon_composite_over_8888_n_0565-fast-path.patch \ + file://0009-ARM-reuse-common-NEON-code-for-over_-n_8-8888_n-8888.patch \ + file://0010-ARM-added-neon_composite_over_0565_n_0565-fast-path.patch \ + file://0011-ARM-added-neon_composite_add_8888_8_8888-fast-path.patch \ + file://0012-ARM-better-NEON-instructions-scheduling-for-add_8888.patch \ + file://0013-ARM-added-neon_composite_add_n_8_8888-fast-path.patch \ + file://0014-ARM-added-neon_composite_add_8888_n_8888-fast-path.patch \ + file://0015-ARM-added-flags-parameter-to-some-asm-fast-path-wrap.patch \ + file://0016-ARM-added-neon_composite_in_n_8-fast-path.patch \ + file://0017-add-_pixman_bits_override_accessors.patch \ + file://0018-Generic-C-implementation-of-pixman_blt-with-overlapp.patch \ + file://0019-Support-of-overlapping-src-dst-for-pixman_blt_mmx.patch \ + file://0020-Support-of-overlapping-src-dst-for-pixman_blt_sse2.patch \ + file://0021-Support-of-overlapping-src-dst-for-pixman_blt_neon.patch \ + file://0022-ARM-added-NEON-optimizations-for-fetch-store-r5g6b5-.patch \ + file://0023-ARM-added-NEON-optimizations-for-fetch-store-a8-scan.patch \ + file://0024-ARM-added-NEON-optimizations-for-fetching-x8r8g8b8-s.patch \ " NEON = " --disable-arm-neon " |