aboutsummaryrefslogtreecommitdiff
path: root/recipes/xorg-lib/pixman-0.21.6/0034-ARM-support-different-levels-of-loop-unrolling-in-bi.patch
diff options
context:
space:
mode:
Diffstat (limited to 'recipes/xorg-lib/pixman-0.21.6/0034-ARM-support-different-levels-of-loop-unrolling-in-bi.patch')
-rw-r--r--recipes/xorg-lib/pixman-0.21.6/0034-ARM-support-different-levels-of-loop-unrolling-in-bi.patch156
1 files changed, 156 insertions, 0 deletions
diff --git a/recipes/xorg-lib/pixman-0.21.6/0034-ARM-support-different-levels-of-loop-unrolling-in-bi.patch b/recipes/xorg-lib/pixman-0.21.6/0034-ARM-support-different-levels-of-loop-unrolling-in-bi.patch
new file mode 100644
index 0000000..82661f0
--- /dev/null
+++ b/recipes/xorg-lib/pixman-0.21.6/0034-ARM-support-different-levels-of-loop-unrolling-in-bi.patch
@@ -0,0 +1,156 @@
+From cd20ceb7602348ecbfa0db1756dc548a0bad3c9d Mon Sep 17 00:00:00 2001
+From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date: Thu, 17 Mar 2011 19:42:01 +0200
+Subject: [PATCH 34/40] ARM: support different levels of loop unrolling in bilinear scaler
+
+Now an extra 'flag' parameter is supported in bilinear scaline scaling
+function generation macro. It can be used to enable 4 or 8 pixels per
+loop iteration unrolling and provide save/restore code for d8-d15
+registers.
+---
+ pixman/pixman-arm-neon-asm.S | 84 ++++++++++++++++++++++++++++++++++++++----
+ 1 files changed, 76 insertions(+), 8 deletions(-)
+
+diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
+index 9878bf7..6141770 100644
+--- a/pixman/pixman-arm-neon-asm.S
++++ b/pixman/pixman-arm-neon-asm.S
+@@ -2633,6 +2633,36 @@ fname:
+ .endif
+ .endm
+
++.macro bilinear_interpolate_eight_pixels_head src_fmt, dst_fmt
++.ifdef have_bilinear_interpolate_eight_pixels_&src_fmt&_&dst_fmt
++ bilinear_interpolate_eight_pixels_&src_fmt&_&dst_fmt&_head
++.else
++ bilinear_interpolate_four_pixels_head src_fmt, dst_fmt
++ bilinear_interpolate_four_pixels_tail_head src_fmt, dst_fmt
++.endif
++.endm
++
++.macro bilinear_interpolate_eight_pixels_tail src_fmt, dst_fmt
++.ifdef have_bilinear_interpolate_eight_pixels_&src_fmt&_&dst_fmt
++ bilinear_interpolate_eight_pixels_&src_fmt&_&dst_fmt&_tail
++.else
++ bilinear_interpolate_four_pixels_tail src_fmt, dst_fmt
++.endif
++.endm
++
++.macro bilinear_interpolate_eight_pixels_tail_head src_fmt, dst_fmt
++.ifdef have_bilinear_interpolate_eight_pixels_&src_fmt&_&dst_fmt
++ bilinear_interpolate_eight_pixels_&src_fmt&_&dst_fmt&_tail_head
++.else
++ bilinear_interpolate_four_pixels_tail_head src_fmt, dst_fmt
++ bilinear_interpolate_four_pixels_tail_head src_fmt, dst_fmt
++.endif
++.endm
++
++.set BILINEAR_FLAG_UNROLL_4, 0
++.set BILINEAR_FLAG_UNROLL_8, 1
++.set BILINEAR_FLAG_USE_ALL_NEON_REGS, 2
++
+ /*
+ * Main template macro for generating NEON optimized bilinear scanline
+ * functions.
+@@ -2648,7 +2678,7 @@ fname:
+
+ .macro generate_bilinear_scanline_func fname, src_fmt, dst_fmt, \
+ src_bpp_shift, dst_bpp_shift, \
+- prefetch_distance
++ prefetch_distance, flags
+
+ pixman_asm_function fname
+ OUT .req r0
+@@ -2672,6 +2702,10 @@ pixman_asm_function fname
+ ldmia ip, {WB, X, UX, WIDTH}
+ mul PF_OFFS, PF_OFFS, UX
+
++.if ((flags) & BILINEAR_FLAG_USE_ALL_NEON_REGS) != 0
++ vpush {d8-d15}
++.endif
++
+ sub STRIDE, BOTTOM, TOP
+ .unreq BOTTOM
+
+@@ -2705,8 +2739,34 @@ pixman_asm_function fname
+ bilinear_interpolate_two_pixels src_fmt, dst_fmt
+ sub WIDTH, WIDTH, #2
+ 0:
+-
+- /* start the main loop */
++.if ((flags) & BILINEAR_FLAG_UNROLL_8) != 0
++/*********** 8 pixels per iteration *****************/
++ cmp WIDTH, #4
++ blt 0f
++ tst OUT, #(1 << (dst_bpp_shift + 2))
++ beq 0f
++ bilinear_interpolate_four_pixels src_fmt, dst_fmt
++ sub WIDTH, WIDTH, #4
++0:
++ subs WIDTH, WIDTH, #8
++ blt 1f
++ mov PF_OFFS, PF_OFFS, asr #(16 - src_bpp_shift)
++ bilinear_interpolate_eight_pixels_head src_fmt, dst_fmt
++ subs WIDTH, WIDTH, #8
++ blt 5f
++0:
++ bilinear_interpolate_eight_pixels_tail_head src_fmt, dst_fmt
++ subs WIDTH, WIDTH, #8
++ bge 0b
++5:
++ bilinear_interpolate_eight_pixels_tail src_fmt, dst_fmt
++1:
++ tst WIDTH, #4
++ beq 2f
++ bilinear_interpolate_four_pixels src_fmt, dst_fmt
++2:
++.else
++/*********** 4 pixels per iteration *****************/
+ subs WIDTH, WIDTH, #4
+ blt 1f
+ mov PF_OFFS, PF_OFFS, asr #(16 - src_bpp_shift)
+@@ -2720,7 +2780,8 @@ pixman_asm_function fname
+ 5:
+ bilinear_interpolate_four_pixels_tail src_fmt, dst_fmt
+ 1:
+-
++/****************************************************/
++.endif
+ /* handle the remaining trailing pixels */
+ tst WIDTH, #2
+ beq 2f
+@@ -2730,6 +2791,9 @@ pixman_asm_function fname
+ beq 3f
+ bilinear_interpolate_last_pixel src_fmt, dst_fmt
+ 3:
++.if ((flags) & BILINEAR_FLAG_USE_ALL_NEON_REGS) != 0
++ vpop {d8-d15}
++.endif
+ pop {r4, r5, r6, r7, r8, r9}
+ bx lr
+
+@@ -2751,13 +2815,17 @@ pixman_asm_function fname
+ .endm
+
+ generate_bilinear_scanline_func \
+- pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_neon, 8888, 8888, 2, 2, 28
++ pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_neon, 8888, 8888, \
++ 2, 2, 28, BILINEAR_FLAG_UNROLL_4
+
+ generate_bilinear_scanline_func \
+- pixman_scaled_bilinear_scanline_8888_0565_SRC_asm_neon, 8888, 0565, 2, 1, 28
++ pixman_scaled_bilinear_scanline_8888_0565_SRC_asm_neon, 8888, 0565, \
++ 2, 1, 28, BILINEAR_FLAG_UNROLL_4
+
+ generate_bilinear_scanline_func \
+- pixman_scaled_bilinear_scanline_0565_x888_SRC_asm_neon, 0565, 8888, 1, 2, 28
++ pixman_scaled_bilinear_scanline_0565_x888_SRC_asm_neon, 0565, 8888, \
++ 1, 2, 28, BILINEAR_FLAG_UNROLL_4
+
+ generate_bilinear_scanline_func \
+- pixman_scaled_bilinear_scanline_0565_0565_SRC_asm_neon, 0565, 0565, 1, 1, 28
++ pixman_scaled_bilinear_scanline_0565_0565_SRC_asm_neon, 0565, 0565, \
++ 1, 1, 28, BILINEAR_FLAG_UNROLL_4
+--
+1.6.6.1
+