aboutsummaryrefslogtreecommitdiffstats
path: root/recipes/xorg-lib/pixman-0.21.6/0014-ARM-common-macro-for-nearest-scaling-fast-paths.patch
diff options
context:
space:
mode:
Diffstat (limited to 'recipes/xorg-lib/pixman-0.21.6/0014-ARM-common-macro-for-nearest-scaling-fast-paths.patch')
-rw-r--r--recipes/xorg-lib/pixman-0.21.6/0014-ARM-common-macro-for-nearest-scaling-fast-paths.patch131
1 files changed, 131 insertions, 0 deletions
diff --git a/recipes/xorg-lib/pixman-0.21.6/0014-ARM-common-macro-for-nearest-scaling-fast-paths.patch b/recipes/xorg-lib/pixman-0.21.6/0014-ARM-common-macro-for-nearest-scaling-fast-paths.patch
new file mode 100644
index 0000000000..115d5170c6
--- /dev/null
+++ b/recipes/xorg-lib/pixman-0.21.6/0014-ARM-common-macro-for-nearest-scaling-fast-paths.patch
@@ -0,0 +1,131 @@
+From f3e17872f5522e25da8e32de83e62bee8cc198d7 Mon Sep 17 00:00:00 2001
+From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date: Mon, 7 Mar 2011 03:10:43 +0200
+Subject: [PATCH 14/40] ARM: common macro for nearest scaling fast paths
+
+The code of nearest scaled 'src_0565_0565' function was generalized
+and moved to a common macro, so that it can be reused for other
+fast paths.
+---
+ pixman/pixman-arm-simd-asm.S | 60 +++++++++++++++++++++++++----------------
+ 1 files changed, 36 insertions(+), 24 deletions(-)
+
+diff --git a/pixman/pixman-arm-simd-asm.S b/pixman/pixman-arm-simd-asm.S
+index dd1366d..a9775e2 100644
+--- a/pixman/pixman-arm-simd-asm.S
++++ b/pixman/pixman-arm-simd-asm.S
+@@ -331,15 +331,29 @@ pixman_asm_function pixman_composite_over_n_8_8888_asm_armv6
+ .endfunc
+
+ /*
+- * Note: This function is only using armv4t instructions (not even armv6),
++ * Note: This code is only using armv5te instructions (not even armv6),
+ * but is scheduled for ARM Cortex-A8 pipeline. So it might need to
+ * be split into a few variants, tuned for each microarchitecture.
+ *
+ * TODO: In order to get good performance on ARM9/ARM11 cores (which don't
+ * have efficient write combining), it needs to be changed to use 16-byte
+ * aligned writes using STM instruction.
++ *
++ * Nearest scanline scaler macro template uses the following arguments:
++ * fname - name of the function to generate
++ * bpp_shift - (1 << bpp_shift) is the size of pixel in bytes
++ * t - type suffix for LDR/STR instructions
++ * prefetch_distance - prefetch in the source image by that many
++ * pixels ahead
++ * prefetch_braking_distance - stop prefetching when that many pixels are
++ * remaining before the end of scanline
+ */
+-pixman_asm_function pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6
++
++.macro generate_nearest_scanline_func fname, bpp_shift, t, \
++ prefetch_distance, \
++ prefetch_braking_distance
++
++pixman_asm_function fname
+ W .req r0
+ DST .req r1
+ SRC .req r2
+@@ -352,35 +366,29 @@ pixman_asm_function pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6
+
+ ldr UNIT_X, [sp]
+ push {r4, r5, r6, r7}
+- mvn VXMASK, #1
++ mvn VXMASK, #((1 << bpp_shift) - 1)
+
+ /* define helper macro */
+ .macro scale_2_pixels
+- ldrh TMP1, [SRC, TMP1]
+- and TMP2, VXMASK, VX, lsr #15
++ ldr&t TMP1, [SRC, TMP1]
++ and TMP2, VXMASK, VX, lsr #(16 - bpp_shift)
+ add VX, VX, UNIT_X
+- strh TMP1, [DST], #2
++ str&t TMP1, [DST], #(1 << bpp_shift)
+
+- ldrh TMP2, [SRC, TMP2]
+- and TMP1, VXMASK, VX, lsr #15
++ ldr&t TMP2, [SRC, TMP2]
++ and TMP1, VXMASK, VX, lsr #(16 - bpp_shift)
+ add VX, VX, UNIT_X
+- strh TMP2, [DST], #2
++ str&t TMP2, [DST], #(1 << bpp_shift)
+ .endm
+
+- /*
+- * stop prefetch before reaching the end of scanline (a good behaving
+- * value selected based on some benchmarks with short scanlines)
+- */
+- #define PREFETCH_BRAKING_DISTANCE 32
+-
+ /* now do the scaling */
+- and TMP1, VXMASK, VX, lsr #15
++ and TMP1, VXMASK, VX, lsr #(16 - bpp_shift)
+ add VX, VX, UNIT_X
+- subs W, #(8 + PREFETCH_BRAKING_DISTANCE)
++ subs W, W, #(8 + prefetch_braking_distance)
+ blt 2f
+- /* set prefetch distance to 80 pixels ahead */
+- add PF_OFFS, VX, UNIT_X, lsl #6
+- add PF_OFFS, PF_OFFS, UNIT_X, lsl #4
++ /* calculate prefetch offset */
++ mov PF_OFFS, #prefetch_distance
++ mla PF_OFFS, UNIT_X, PF_OFFS, VX
+ 1: /* main loop, process 8 pixels per iteration with prefetch */
+ subs W, W, #8
+ add PF_OFFS, UNIT_X, lsl #3
+@@ -388,10 +396,10 @@ pixman_asm_function pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6
+ scale_2_pixels
+ scale_2_pixels
+ scale_2_pixels
+- pld [SRC, PF_OFFS, lsr #15]
++ pld [SRC, PF_OFFS, lsr #(16 - bpp_shift)]
+ bge 1b
+ 2:
+- subs W, #(4 - 8 - PREFETCH_BRAKING_DISTANCE)
++ subs W, W, #(4 - 8 - prefetch_braking_distance)
+ blt 2f
+ 1: /* process the remaining pixels */
+ scale_2_pixels
+@@ -404,8 +412,8 @@ pixman_asm_function pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6
+ scale_2_pixels
+ 2:
+ tst W, #1
+- ldrneh TMP1, [SRC, TMP1]
+- strneh TMP1, [DST], #2
++ ldrne&t TMP1, [SRC, TMP1]
++ strne&t TMP1, [DST]
+ /* cleanup helper macro */
+ .purgem scale_2_pixels
+ .unreq DST
+@@ -421,3 +429,7 @@ pixman_asm_function pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6
+ pop {r4, r5, r6, r7}
+ bx lr
+ .endfunc
++.endm
++
++generate_nearest_scanline_func \
++ pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6, 1, h, 80, 32
+--
+1.6.6.1
+