aboutsummaryrefslogtreecommitdiffstats
path: root/meta-oe/recipes-graphics/xorg-lib/pixman-0.20.0/0008-ARM-optimization-for-scaled-src_0565_0565-operation-.patch
diff options
context:
space:
mode:
Diffstat (limited to 'meta-oe/recipes-graphics/xorg-lib/pixman-0.20.0/0008-ARM-optimization-for-scaled-src_0565_0565-operation-.patch')
-rw-r--r--meta-oe/recipes-graphics/xorg-lib/pixman-0.20.0/0008-ARM-optimization-for-scaled-src_0565_0565-operation-.patch172
1 files changed, 172 insertions, 0 deletions
diff --git a/meta-oe/recipes-graphics/xorg-lib/pixman-0.20.0/0008-ARM-optimization-for-scaled-src_0565_0565-operation-.patch b/meta-oe/recipes-graphics/xorg-lib/pixman-0.20.0/0008-ARM-optimization-for-scaled-src_0565_0565-operation-.patch
new file mode 100644
index 0000000000..6efdb621ad
--- /dev/null
+++ b/meta-oe/recipes-graphics/xorg-lib/pixman-0.20.0/0008-ARM-optimization-for-scaled-src_0565_0565-operation-.patch
@@ -0,0 +1,172 @@
+From e1191ad6563a1fb02a45982b1c4d7fed3c655e97 Mon Sep 17 00:00:00 2001
+From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date: Mon, 4 Oct 2010 01:56:59 +0300
+Subject: [PATCH 8/8] ARM optimization for scaled src_0565_0565 operation with nearest filter
+
+The code actually uses only armv4t instructions.
+
+Benchmark from ARM11:
+
+ == before ==
+ op=1, src_fmt=10020565, dst_fmt=10020565, speed=34.86 MPix/s
+
+ == after ==
+ op=1, src_fmt=10020565, dst_fmt=10020565, speed=36.62 MPix/s
+
+Benchmark from ARM Cortex-A8:
+
+ == before ==
+ op=1, src_fmt=10020565, dst_fmt=10020565, speed=89.55 MPix/s
+
+ == after ==
+ op=1, src_fmt=10020565, dst_fmt=10020565, speed=94.91 MPix/s
+---
+ pixman/pixman-arm-simd-asm.S | 66 ++++++++++++++++++++++++++++++++++++++++++
+ pixman/pixman-arm-simd.c | 37 +++++++++++++++++++++++
+ 2 files changed, 103 insertions(+), 0 deletions(-)
+
+diff --git a/pixman/pixman-arm-simd-asm.S b/pixman/pixman-arm-simd-asm.S
+index a3d2d40..b6f69db 100644
+--- a/pixman/pixman-arm-simd-asm.S
++++ b/pixman/pixman-arm-simd-asm.S
+@@ -1,5 +1,6 @@
+ /*
+ * Copyright © 2008 Mozilla Corporation
++ * Copyright © 2010 Nokia Corporation
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+@@ -328,3 +329,68 @@ pixman_asm_function pixman_composite_over_n_8_8888_asm_armv6
+ pop {r4, r5, r6, r7, r8, r9, r10, r11}
+ bx lr
+ .endfunc
++
++/*
++ * Note: This function is actually primarily optimized for ARM Cortex-A8
++ * pipeline. In order to get good performance on ARM9/ARM11 cores (which
++ * don't have efficient write combining), it needs to be changed to use
++ * 16-byte aligned writes using STM instruction.
++ */
++pixman_asm_function pixman_scaled_nearest_scanline_565_565_SRC_asm_armv6
++ DST .req r0
++ SRC .req r1
++ W .req r2
++ VX .req r3
++ UNIT_X .req r12
++ TMP1 .req r4
++ TMP2 .req r5
++ MASK .req r6
++ ldr UNIT_X, [sp]
++ push {r4, r5, r6, r7}
++ mvn MASK, #1
++
++ /* define helper macro */
++ .macro scale_2_pixels
++ ldrh TMP1, [SRC, TMP1]
++ and TMP2, MASK, VX, lsr #15
++ add VX, VX, UNIT_X
++ strh TMP1, [DST], #2
++
++ ldrh TMP2, [SRC, TMP2]
++ and TMP1, MASK, VX, lsr #15
++ add VX, VX, UNIT_X
++ strh TMP2, [DST], #2
++ .endm
++
++ /* now do the scaling */
++ and TMP1, MASK, VX, lsr #15
++ add VX, VX, UNIT_X
++ subs W, #4
++ blt 2f
++1: /* main loop, process 4 pixels per iteration */
++ scale_2_pixels
++ scale_2_pixels
++ subs W, W, #4
++ bge 1b
++2:
++ tst W, #2
++ beq 2f
++ scale_2_pixels
++2:
++ tst W, #1
++ ldrneh TMP1, [SRC, TMP1]
++ strneh TMP1, [DST], #2
++ /* cleanup helper macro */
++ .purgem scale_2_pixels
++ .unreq DST
++ .unreq SRC
++ .unreq W
++ .unreq VX
++ .unreq UNIT_X
++ .unreq TMP1
++ .unreq TMP2
++ .unreq MASK
++ /* return */
++ pop {r4, r5, r6, r7}
++ bx lr
++.endfunc
+diff --git a/pixman/pixman-arm-simd.c b/pixman/pixman-arm-simd.c
+index d466a31..f6f464c 100644
+--- a/pixman/pixman-arm-simd.c
++++ b/pixman/pixman-arm-simd.c
+@@ -29,6 +29,7 @@
+
+ #include "pixman-private.h"
+ #include "pixman-arm-common.h"
++#include "pixman-fast-path.h"
+
+ #if 0 /* This code was moved to 'pixman-arm-simd-asm.S' */
+
+@@ -375,6 +376,35 @@ pixman_composite_over_n_8_8888_asm_armv6 (int32_t width,
+
+ #endif
+
++void
++pixman_scaled_nearest_scanline_565_565_SRC_asm_armv6 (uint16_t * dst,
++ uint16_t * src,
++ int32_t w,
++ pixman_fixed_t vx,
++ pixman_fixed_t unit_x);
++
++static force_inline void
++scaled_nearest_scanline_armv6_565_565_SRC (uint16_t * dst,
++ uint16_t * src,
++ int32_t w,
++ pixman_fixed_t vx,
++ pixman_fixed_t unit_x,
++ pixman_fixed_t max_vx)
++{
++ pixman_scaled_nearest_scanline_565_565_SRC_asm_armv6 (dst, src, w,
++ vx, unit_x);
++}
++
++FAST_NEAREST_MAINLOOP (armv6_565_565_cover_SRC,
++ scaled_nearest_scanline_armv6_565_565_SRC,
++ uint16_t, uint16_t, COVER);
++FAST_NEAREST_MAINLOOP (armv6_565_565_none_SRC,
++ scaled_nearest_scanline_armv6_565_565_SRC,
++ uint16_t, uint16_t, NONE);
++FAST_NEAREST_MAINLOOP (armv6_565_565_pad_SRC,
++ scaled_nearest_scanline_armv6_565_565_SRC,
++ uint16_t, uint16_t, PAD);
++
+ PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, add_8_8,
+ uint8_t, 1, uint8_t, 1)
+ PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, over_8888_8888,
+@@ -404,6 +434,13 @@ static const pixman_fast_path_t arm_simd_fast_paths[] =
+ PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, armv6_composite_over_n_8_8888),
+ PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, armv6_composite_over_n_8_8888),
+
++ SIMPLE_NEAREST_FAST_PATH_COVER (SRC, r5g6b5, r5g6b5, armv6_565_565),
++ SIMPLE_NEAREST_FAST_PATH_COVER (SRC, b5g6r5, b5g6r5, armv6_565_565),
++ SIMPLE_NEAREST_FAST_PATH_NONE (SRC, r5g6b5, r5g6b5, armv6_565_565),
++ SIMPLE_NEAREST_FAST_PATH_NONE (SRC, b5g6r5, b5g6r5, armv6_565_565),
++ SIMPLE_NEAREST_FAST_PATH_PAD (SRC, r5g6b5, r5g6b5, armv6_565_565),
++ SIMPLE_NEAREST_FAST_PATH_PAD (SRC, b5g6r5, b5g6r5, armv6_565_565),
++
+ { PIXMAN_OP_NONE },
+ };
+
+--
+1.6.6.1
+