aboutsummaryrefslogtreecommitdiffstats
path: root/recipes/xorg-lib/pixman-0.21.6/0014-ARM-common-macro-for-nearest-scaling-fast-paths.patch
blob: 115d5170c6c618686db1a0a1baceb998a3942cce (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
From f3e17872f5522e25da8e32de83e62bee8cc198d7 Mon Sep 17 00:00:00 2001
From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
Date: Mon, 7 Mar 2011 03:10:43 +0200
Subject: [PATCH 14/40] ARM: common macro for nearest scaling fast paths

The code of nearest scaled 'src_0565_0565' function was generalized
and moved to a common macro, so that it can be reused for other
fast paths.
---
 pixman/pixman-arm-simd-asm.S |   60 +++++++++++++++++++++++++----------------
 1 files changed, 36 insertions(+), 24 deletions(-)

diff --git a/pixman/pixman-arm-simd-asm.S b/pixman/pixman-arm-simd-asm.S
index dd1366d..a9775e2 100644
--- a/pixman/pixman-arm-simd-asm.S
+++ b/pixman/pixman-arm-simd-asm.S
@@ -331,15 +331,29 @@ pixman_asm_function pixman_composite_over_n_8_8888_asm_armv6
 .endfunc
 
 /*
- * Note: This function is only using armv4t instructions (not even armv6),
+ * Note: This code is only using armv5te instructions (not even armv6),
  *       but is scheduled for ARM Cortex-A8 pipeline. So it might need to
  *       be split into a few variants, tuned for each microarchitecture.
  *
  * TODO: In order to get good performance on ARM9/ARM11 cores (which don't
  * have efficient write combining), it needs to be changed to use 16-byte
  * aligned writes using STM instruction.
+ *
+ * Nearest scanline scaler macro template uses the following arguments:
+ *  fname                     - name of the function to generate
+ *  bpp_shift                 - (1 << bpp_shift) is the size of pixel in bytes
+ *  t                         - type suffix for LDR/STR instructions
+ *  prefetch_distance         - prefetch in the source image by that many
+ *                              pixels ahead
+ *  prefetch_braking_distance - stop prefetching when that many pixels are
+ *                              remaining before the end of scanline
  */
-pixman_asm_function pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6
+
+.macro generate_nearest_scanline_func fname, bpp_shift, t,      \
+                                      prefetch_distance,        \
+                                      prefetch_braking_distance
+
+pixman_asm_function fname
 	W	.req	r0
 	DST	.req	r1
 	SRC	.req	r2
@@ -352,35 +366,29 @@ pixman_asm_function pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6
 
 	ldr	UNIT_X, [sp]
 	push	{r4, r5, r6, r7}
-	mvn	VXMASK, #1
+	mvn	VXMASK, #((1 << bpp_shift) - 1)
 
 	/* define helper macro */
 	.macro	scale_2_pixels
-		ldrh	TMP1, [SRC, TMP1]
-		and	TMP2, VXMASK, VX, lsr #15
+		ldr&t	TMP1, [SRC, TMP1]
+		and	TMP2, VXMASK, VX, lsr #(16 - bpp_shift)
 		add	VX, VX, UNIT_X
-		strh	TMP1, [DST], #2
+		str&t	TMP1, [DST], #(1 << bpp_shift)
 
-		ldrh	TMP2, [SRC, TMP2]
-		and	TMP1, VXMASK, VX, lsr #15
+		ldr&t	TMP2, [SRC, TMP2]
+		and	TMP1, VXMASK, VX, lsr #(16 - bpp_shift)
 		add	VX, VX, UNIT_X
-		strh	TMP2, [DST], #2
+		str&t	TMP2, [DST], #(1 << bpp_shift)
 	.endm
 
-	/*
-	 * stop prefetch before reaching the end of scanline (a good behaving
-	 * value selected based on some benchmarks with short scanlines)
-	 */
-	#define PREFETCH_BRAKING_DISTANCE 32
-
 	/* now do the scaling */
-	and	TMP1, VXMASK, VX, lsr #15
+	and	TMP1, VXMASK, VX, lsr #(16 - bpp_shift)
 	add	VX, VX, UNIT_X
-	subs	W, #(8 + PREFETCH_BRAKING_DISTANCE)
+	subs	W, W, #(8 + prefetch_braking_distance)
 	blt	2f
-	/* set prefetch distance to 80 pixels ahead */
-	add	PF_OFFS, VX, UNIT_X, lsl #6
-	add	PF_OFFS, PF_OFFS, UNIT_X, lsl #4
+	/* calculate prefetch offset */
+	mov	PF_OFFS, #prefetch_distance
+	mla	PF_OFFS, UNIT_X, PF_OFFS, VX
 1:	/* main loop, process 8 pixels per iteration with prefetch */
 	subs	W, W, #8
 	add	PF_OFFS, UNIT_X, lsl #3
@@ -388,10 +396,10 @@ pixman_asm_function pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6
 	scale_2_pixels
 	scale_2_pixels
 	scale_2_pixels
-	pld	[SRC, PF_OFFS, lsr #15]
+	pld	[SRC, PF_OFFS, lsr #(16 - bpp_shift)]
 	bge	1b
 2:
-	subs	W, #(4 - 8 - PREFETCH_BRAKING_DISTANCE)
+	subs	W, W, #(4 - 8 - prefetch_braking_distance)
 	blt	2f
 1:	/* process the remaining pixels */
 	scale_2_pixels
@@ -404,8 +412,8 @@ pixman_asm_function pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6
 	scale_2_pixels
 2:
 	tst	W, #1
-	ldrneh	TMP1, [SRC, TMP1]
-	strneh	TMP1, [DST], #2
+	ldrne&t	TMP1, [SRC, TMP1]
+	strne&t	TMP1, [DST]
 	/* cleanup helper macro */
 	.purgem	scale_2_pixels
 	.unreq	DST
@@ -421,3 +429,7 @@ pixman_asm_function pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6
 	pop	{r4, r5, r6, r7}
 	bx	lr
 .endfunc
+.endm
+
+generate_nearest_scanline_func \
+    pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6, 1, h, 80, 32
-- 
1.6.6.1