aboutsummaryrefslogtreecommitdiffstats
path: root/meta-oe/recipes-graphics/xorg-lib/pixman-0.23.6/0004-ARM-NEON-Instruction-scheduling-of-bilinear-over_888.patch
diff options
context:
space:
mode:
Diffstat (limited to 'meta-oe/recipes-graphics/xorg-lib/pixman-0.23.6/0004-ARM-NEON-Instruction-scheduling-of-bilinear-over_888.patch')
-rw-r--r--meta-oe/recipes-graphics/xorg-lib/pixman-0.23.6/0004-ARM-NEON-Instruction-scheduling-of-bilinear-over_888.patch186
1 files changed, 0 insertions, 186 deletions
diff --git a/meta-oe/recipes-graphics/xorg-lib/pixman-0.23.6/0004-ARM-NEON-Instruction-scheduling-of-bilinear-over_888.patch b/meta-oe/recipes-graphics/xorg-lib/pixman-0.23.6/0004-ARM-NEON-Instruction-scheduling-of-bilinear-over_888.patch
deleted file mode 100644
index d6c94ba2c9..0000000000
--- a/meta-oe/recipes-graphics/xorg-lib/pixman-0.23.6/0004-ARM-NEON-Instruction-scheduling-of-bilinear-over_888.patch
+++ /dev/null
@@ -1,186 +0,0 @@
-From b9009d108277b42ebb4c0ea03eb3fb5845106497 Mon Sep 17 00:00:00 2001
-From: Taekyun Kim <tkq.kim@samsung.com>
-Date: Wed, 21 Sep 2011 15:52:13 +0900
-Subject: [PATCH 4/8] ARM: NEON: Instruction scheduling of bilinear over_8888_8888
-
-Instructions are reordered to eliminate pipeline stalls and get
-better memory access.
-
-Performance of before/after on cortex-a8 @ 1GHz
-
-<< 2000 x 2000 with scale factor close to 1.x >>
-before : 50.43 Mpix/s
-after : 61.09 Mpix/s
----
- pixman/pixman-arm-neon-asm-bilinear.S | 149 ++++++++++++++++++++++++++++++++-
- 1 files changed, 146 insertions(+), 3 deletions(-)
-
-diff --git a/pixman/pixman-arm-neon-asm-bilinear.S b/pixman/pixman-arm-neon-asm-bilinear.S
-index 25bcb24..82d248e 100644
---- a/pixman/pixman-arm-neon-asm-bilinear.S
-+++ b/pixman/pixman-arm-neon-asm-bilinear.S
-@@ -893,15 +893,158 @@ pixman_asm_function fname
- .endm
-
- .macro bilinear_over_8888_8888_process_pixblock_head
-- bilinear_over_8888_8888_process_four_pixels
-+ mov TMP1, X, asr #16
-+ add X, X, UX
-+ add TMP1, TOP, TMP1, asl #2
-+ mov TMP2, X, asr #16
-+ add X, X, UX
-+ add TMP2, TOP, TMP2, asl #2
-+
-+ vld1.32 {d22}, [TMP1], STRIDE
-+ vld1.32 {d23}, [TMP1]
-+ mov TMP3, X, asr #16
-+ add X, X, UX
-+ add TMP3, TOP, TMP3, asl #2
-+ vmull.u8 q8, d22, d28
-+ vmlal.u8 q8, d23, d29
-+
-+ vld1.32 {d22}, [TMP2], STRIDE
-+ vld1.32 {d23}, [TMP2]
-+ mov TMP4, X, asr #16
-+ add X, X, UX
-+ add TMP4, TOP, TMP4, asl #2
-+ vmull.u8 q9, d22, d28
-+ vmlal.u8 q9, d23, d29
-+
-+ vld1.32 {d22}, [TMP3], STRIDE
-+ vld1.32 {d23}, [TMP3]
-+ vmull.u8 q10, d22, d28
-+ vmlal.u8 q10, d23, d29
-+
-+ vshll.u16 q0, d16, #8
-+ vmlsl.u16 q0, d16, d30
-+ vmlal.u16 q0, d17, d30
-+
-+ pld [TMP4, PF_OFFS]
-+ vld1.32 {d16}, [TMP4], STRIDE
-+ vld1.32 {d17}, [TMP4]
-+ pld [TMP4, PF_OFFS]
-+ vmull.u8 q11, d16, d28
-+ vmlal.u8 q11, d17, d29
-+
-+ vshll.u16 q1, d18, #8
-+ vmlsl.u16 q1, d18, d31
-+ vmlal.u16 q1, d19, d31
-+ vshr.u16 q15, q12, #8
-+ vadd.u16 q12, q12, q13
- .endm
-
- .macro bilinear_over_8888_8888_process_pixblock_tail
-+ vshll.u16 q2, d20, #8
-+ vmlsl.u16 q2, d20, d30
-+ vmlal.u16 q2, d21, d30
-+ vshll.u16 q3, d22, #8
-+ vmlsl.u16 q3, d22, d31
-+ vmlal.u16 q3, d23, d31
-+ vshrn.u32 d0, q0, #16
-+ vshrn.u32 d1, q1, #16
-+ vld1.32 {d2, d3}, [OUT, :128]
-+ pld [OUT, PF_OFFS]
-+ vshrn.u32 d4, q2, #16
-+ vshr.u16 q15, q12, #8
-+ vshrn.u32 d5, q3, #16
-+ vmovn.u16 d6, q0
-+ vmovn.u16 d7, q2
-+ vuzp.8 d6, d7
-+ vuzp.8 d2, d3
-+ vuzp.8 d6, d7
-+ vuzp.8 d2, d3
-+ vdup.32 d4, d7[1]
-+ vmvn.8 d4, d4
-+ vmull.u8 q11, d2, d4
-+ vmull.u8 q2, d3, d4
-+ vrshr.u16 q1, q11, #8
-+ vrshr.u16 q10, q2, #8
-+ vraddhn.u16 d2, q1, q11
-+ vraddhn.u16 d3, q10, q2
-+ vqadd.u8 q3, q1, q3
-+ vuzp.8 d6, d7
-+ vuzp.8 d6, d7
-+ vadd.u16 q12, q12, q13
-+ vst1.32 {d6, d7}, [OUT, :128]!
- .endm
-
- .macro bilinear_over_8888_8888_process_pixblock_tail_head
-- bilinear_over_8888_8888_process_pixblock_tail
-- bilinear_over_8888_8888_process_pixblock_head
-+ vshll.u16 q2, d20, #8
-+ mov TMP1, X, asr #16
-+ add X, X, UX
-+ add TMP1, TOP, TMP1, asl #2
-+ vmlsl.u16 q2, d20, d30
-+ mov TMP2, X, asr #16
-+ add X, X, UX
-+ add TMP2, TOP, TMP2, asl #2
-+ vmlal.u16 q2, d21, d30
-+ vshll.u16 q3, d22, #8
-+ vld1.32 {d20}, [TMP1], STRIDE
-+ vmlsl.u16 q3, d22, d31
-+ vmlal.u16 q3, d23, d31
-+ vld1.32 {d21}, [TMP1]
-+ vmull.u8 q8, d20, d28
-+ vmlal.u8 q8, d21, d29
-+ vshrn.u32 d0, q0, #16
-+ vshrn.u32 d1, q1, #16
-+ vld1.32 {d2, d3}, [OUT, :128]
-+ pld [OUT, PF_OFFS]
-+ vshrn.u32 d4, q2, #16
-+ vshr.u16 q15, q12, #8
-+ vld1.32 {d22}, [TMP2], STRIDE
-+ vshrn.u32 d5, q3, #16
-+ vmovn.u16 d6, q0
-+ vld1.32 {d23}, [TMP2]
-+ vmull.u8 q9, d22, d28
-+ mov TMP3, X, asr #16
-+ add X, X, UX
-+ add TMP3, TOP, TMP3, asl #2
-+ mov TMP4, X, asr #16
-+ add X, X, UX
-+ add TMP4, TOP, TMP4, asl #2
-+ vmlal.u8 q9, d23, d29
-+ vmovn.u16 d7, q2
-+ vld1.32 {d22}, [TMP3], STRIDE
-+ vuzp.8 d6, d7
-+ vuzp.8 d2, d3
-+ vuzp.8 d6, d7
-+ vuzp.8 d2, d3
-+ vdup.32 d4, d7[1]
-+ vld1.32 {d23}, [TMP3]
-+ vmvn.8 d4, d4
-+ vmull.u8 q10, d22, d28
-+ vmlal.u8 q10, d23, d29
-+ vmull.u8 q11, d2, d4
-+ vmull.u8 q2, d3, d4
-+ vshll.u16 q0, d16, #8
-+ vmlsl.u16 q0, d16, d30
-+ vrshr.u16 q1, q11, #8
-+ vmlal.u16 q0, d17, d30
-+ vrshr.u16 q8, q2, #8
-+ vraddhn.u16 d2, q1, q11
-+ vraddhn.u16 d3, q8, q2
-+ pld [TMP4, PF_OFFS]
-+ vld1.32 {d16}, [TMP4], STRIDE
-+ vqadd.u8 q3, q1, q3
-+ vld1.32 {d17}, [TMP4]
-+ pld [TMP4, PF_OFFS]
-+ vmull.u8 q11, d16, d28
-+ vmlal.u8 q11, d17, d29
-+ vuzp.8 d6, d7
-+ vshll.u16 q1, d18, #8
-+ vuzp.8 d6, d7
-+ vmlsl.u16 q1, d18, d31
-+ vadd.u16 q12, q12, q13
-+ vmlal.u16 q1, d19, d31
-+ vshr.u16 q15, q12, #8
-+ vadd.u16 q12, q12, q13
-+ vst1.32 {d6, d7}, [OUT, :128]!
- .endm
-
- /* over_8888_8_8888 */
---
-1.6.6.1
-