aboutsummaryrefslogtreecommitdiffstats
path: root/recipes/xorg-lib/pixman-0.21.6/0007-Main-loop-template-for-fast-single-pass-bilinear-sca.patch
diff options
context:
space:
mode:
Diffstat (limited to 'recipes/xorg-lib/pixman-0.21.6/0007-Main-loop-template-for-fast-single-pass-bilinear-sca.patch')
-rw-r--r--recipes/xorg-lib/pixman-0.21.6/0007-Main-loop-template-for-fast-single-pass-bilinear-sca.patch466
1 files changed, 466 insertions, 0 deletions
diff --git a/recipes/xorg-lib/pixman-0.21.6/0007-Main-loop-template-for-fast-single-pass-bilinear-sca.patch b/recipes/xorg-lib/pixman-0.21.6/0007-Main-loop-template-for-fast-single-pass-bilinear-sca.patch
new file mode 100644
index 0000000000..c5dab5c31f
--- /dev/null
+++ b/recipes/xorg-lib/pixman-0.21.6/0007-Main-loop-template-for-fast-single-pass-bilinear-sca.patch
@@ -0,0 +1,466 @@
+From d506bf68fd0e9a1c5dd484daee70631699918387 Mon Sep 17 00:00:00 2001
+From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date: Mon, 21 Feb 2011 01:29:02 +0200
+Subject: [PATCH 07/40] Main loop template for fast single pass bilinear scaling
+
+Can be used for implementing SIMD optimized fast path
+functions which work with bilinear scaled source images.
+
+Similar to the template for nearest scaling main loop, the
+following types of mask are supported:
+1. no mask
+2. non-scaled a8 mask with SAMPLES_COVER_CLIP flag
+3. solid mask
+
+PAD repeat is fully supported. NONE repeat is partially
+supported (right now only works if source image has alpha
+channel or when alpha channel of the source image does not
+have any effect on the compositing operation).
+---
+ pixman/pixman-fast-path.h | 432 +++++++++++++++++++++++++++++++++++++++++++++
+ 1 files changed, 432 insertions(+), 0 deletions(-)
+
+diff --git a/pixman/pixman-fast-path.h b/pixman/pixman-fast-path.h
+index d081222..1885d47 100644
+--- a/pixman/pixman-fast-path.h
++++ b/pixman/pixman-fast-path.h
+@@ -587,4 +587,436 @@ fast_composite_scaled_nearest ## scale_func_name (pixman_implementation_t *imp,
+ SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NONE (op,s,d,func), \
+ SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_PAD (op,s,d,func)
+
++/*****************************************************************************/
++
++/*
++ * Identify 5 zones in each scanline for bilinear scaling. Depending on
++ * whether 2 pixels to be interpolated are fetched from the image itself,
++ * from the padding area around it or from both image and padding area.
++ */
++static force_inline void
++bilinear_pad_repeat_get_scanline_bounds (int32_t source_image_width,
++ pixman_fixed_t vx,
++ pixman_fixed_t unit_x,
++ int32_t * left_pad,
++ int32_t * left_tz,
++ int32_t * width,
++ int32_t * right_tz,
++ int32_t * right_pad)
++{
++ int width1 = *width, left_pad1, right_pad1;
++ int width2 = *width, left_pad2, right_pad2;
++
++ pad_repeat_get_scanline_bounds (source_image_width, vx, unit_x,
++ &width1, &left_pad1, &right_pad1);
++ pad_repeat_get_scanline_bounds (source_image_width, vx + pixman_fixed_1,
++ unit_x, &width2, &left_pad2, &right_pad2);
++
++ *left_pad = left_pad2;
++ *left_tz = left_pad1 - left_pad2;
++ *right_tz = right_pad2 - right_pad1;
++ *right_pad = right_pad1;
++ *width -= *left_pad + *left_tz + *right_tz + *right_pad;
++}
++
++/*
++ * Main loop template for single pass bilinear scaling. It needs to be
++ * provided with 'scanline_func' which should do the compositing operation.
++ * The needed function has the following prototype:
++ *
++ * scanline_func (dst_type_t * dst,
++ * const mask_type_ * mask,
++ * const src_type_t * src_top,
++ * const src_type_t * src_bottom,
++ * int32_t width,
++ * int weight_top,
++ * int weight_bottom,
++ * pixman_fixed_t vx,
++ * pixman_fixed_t unit_x,
++ * pixman_fixed_t max_vx,
++ * pixman_bool_t zero_src)
++ *
++ * Where:
++ * dst - destination scanline buffer for storing results
++ * mask - mask buffer (or single value for solid mask)
++ * src_top, src_bottom - two source scanlines
++ * width - number of pixels to process
++ * weight_top - weight of the top row for interpolation
++ * weight_bottom - weight of the bottom row for interpolation
++ * vx - initial position for fetching the first pair of
++ * pixels from the source buffer
++ * unit_x - position increment needed to move to the next pair
++ * of pixels
++ * max_vx - image size as a fixed point value, can be used for
++ * implementing NORMAL repeat (when it is supported)
++ * zero_src - boolean hint variable, which is set to TRUE when
++ * all source pixels are fetched from zero padding
++ * zone for NONE repeat
++ *
++ * Note: normally the sum of 'weight_top' and 'weight_bottom' is equal to 256,
++ * but sometimes it may be less than that for NONE repeat when handling
++ * fuzzy antialiased top or bottom image edges. Also both top and
++ * bottom weight variables are guaranteed to have value in 0-255
++ * range and can fit into unsigned byte or be used with 8-bit SIMD
++ * multiplication instructions.
++ */
++#define FAST_BILINEAR_MAINLOOP_INT(scale_func_name, scanline_func, src_type_t, mask_type_t, \
++ dst_type_t, repeat_mode, have_mask, mask_is_solid) \
++static void \
++fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp, \
++ pixman_op_t op, \
++ pixman_image_t * src_image, \
++ pixman_image_t * mask_image, \
++ pixman_image_t * dst_image, \
++ int32_t src_x, \
++ int32_t src_y, \
++ int32_t mask_x, \
++ int32_t mask_y, \
++ int32_t dst_x, \
++ int32_t dst_y, \
++ int32_t width, \
++ int32_t height) \
++{ \
++ dst_type_t *dst_line; \
++ mask_type_t *mask_line; \
++ src_type_t *src_first_line; \
++ int y1, y2; \
++ pixman_fixed_t max_vx = INT32_MAX; /* suppress uninitialized variable warning */ \
++ pixman_vector_t v; \
++ pixman_fixed_t vx, vy; \
++ pixman_fixed_t unit_x, unit_y; \
++ int32_t left_pad, left_tz, right_tz, right_pad; \
++ \
++ dst_type_t *dst; \
++ mask_type_t solid_mask; \
++ const mask_type_t *mask = &solid_mask; \
++ int src_stride, mask_stride, dst_stride; \
++ \
++ PIXMAN_IMAGE_GET_LINE (dst_image, dst_x, dst_y, dst_type_t, dst_stride, dst_line, 1); \
++ if (have_mask) \
++ { \
++ if (mask_is_solid) \
++ { \
++ solid_mask = _pixman_image_get_solid (imp, mask_image, dst_image->bits.format); \
++ mask_stride = 0; \
++ } \
++ else \
++ { \
++ PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type_t, \
++ mask_stride, mask_line, 1); \
++ } \
++ } \
++ /* pass in 0 instead of src_x and src_y because src_x and src_y need to be \
++ * transformed from destination space to source space */ \
++ PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, src_type_t, src_stride, src_first_line, 1); \
++ \
++ /* reference point is the center of the pixel */ \
++ v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2; \
++ v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2; \
++ v.vector[2] = pixman_fixed_1; \
++ \
++ if (!pixman_transform_point_3d (src_image->common.transform, &v)) \
++ return; \
++ \
++ unit_x = src_image->common.transform->matrix[0][0]; \
++ unit_y = src_image->common.transform->matrix[1][1]; \
++ \
++ v.vector[0] -= pixman_fixed_1 / 2; \
++ v.vector[1] -= pixman_fixed_1 / 2; \
++ \
++ vy = v.vector[1]; \
++ \
++ if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD || \
++ PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \
++ { \
++ bilinear_pad_repeat_get_scanline_bounds (src_image->bits.width, v.vector[0], unit_x, \
++ &left_pad, &left_tz, &width, &right_tz, &right_pad); \
++ if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD) \
++ { \
++ /* PAD repeat does not need special handling for 'transition zones' and */ \
++ /* they can be combined with 'padding zones' safely */ \
++ left_pad += left_tz; \
++ right_pad += right_tz; \
++ left_tz = right_tz = 0; \
++ } \
++ v.vector[0] += left_pad * unit_x; \
++ } \
++ \
++ while (--height >= 0) \
++ { \
++ int weight1, weight2; \
++ dst = dst_line; \
++ dst_line += dst_stride; \
++ vx = v.vector[0]; \
++ if (have_mask && !mask_is_solid) \
++ { \
++ mask = mask_line; \
++ mask_line += mask_stride; \
++ } \
++ \
++ y1 = pixman_fixed_to_int (vy); \
++ weight2 = (vy >> 8) & 0xff; \
++ if (weight2) \
++ { \
++ /* normal case, both row weights are in 0-255 range and fit unsigned byte */ \
++ y2 = y1 + 1; \
++ weight1 = 256 - weight2; \
++ } \
++ else \
++ { \
++ /* set both top and bottom row to the same scanline, and weights to 128+128 */ \
++ y2 = y1; \
++ weight1 = weight2 = 128; \
++ } \
++ vy += unit_y; \
++ if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD) \
++ { \
++ src_type_t *src1, *src2; \
++ src_type_t buf1[2]; \
++ src_type_t buf2[2]; \
++ repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height); \
++ repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height); \
++ src1 = src_first_line + src_stride * y1; \
++ src2 = src_first_line + src_stride * y2; \
++ \
++ if (left_pad > 0) \
++ { \
++ buf1[0] = buf1[1] = src1[0]; \
++ buf2[0] = buf2[1] = src2[0]; \
++ scanline_func (dst, mask, \
++ buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, FALSE); \
++ dst += left_pad; \
++ if (have_mask && !mask_is_solid) \
++ mask += left_pad; \
++ } \
++ if (width > 0) \
++ { \
++ scanline_func (dst, mask, \
++ src1, src2, width, weight1, weight2, vx, unit_x, 0, FALSE); \
++ dst += width; \
++ if (have_mask && !mask_is_solid) \
++ mask += width; \
++ } \
++ if (right_pad > 0) \
++ { \
++ buf1[0] = buf1[1] = src1[src_image->bits.width - 1]; \
++ buf2[0] = buf2[1] = src2[src_image->bits.width - 1]; \
++ scanline_func (dst, mask, \
++ buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, FALSE); \
++ } \
++ } \
++ else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \
++ { \
++ src_type_t *src1, *src2; \
++ src_type_t buf1[2]; \
++ src_type_t buf2[2]; \
++ /* handle top/bottom zero padding by just setting weights to 0 if needed */ \
++ if (y1 < 0) \
++ { \
++ weight1 = 0; \
++ y1 = 0; \
++ } \
++ if (y1 >= src_image->bits.height) \
++ { \
++ weight1 = 0; \
++ y1 = src_image->bits.height - 1; \
++ } \
++ if (y2 < 0) \
++ { \
++ weight2 = 0; \
++ y2 = 0; \
++ } \
++ if (y2 >= src_image->bits.height) \
++ { \
++ weight2 = 0; \
++ y2 = src_image->bits.height - 1; \
++ } \
++ src1 = src_first_line + src_stride * y1; \
++ src2 = src_first_line + src_stride * y2; \
++ \
++ if (left_pad > 0) \
++ { \
++ buf1[0] = buf1[1] = 0; \
++ buf2[0] = buf2[1] = 0; \
++ scanline_func (dst, mask, \
++ buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, TRUE); \
++ dst += left_pad; \
++ if (have_mask && !mask_is_solid) \
++ mask += left_pad; \
++ } \
++ if (left_tz > 0) \
++ { \
++ buf1[0] = 0; \
++ buf1[1] = src1[0]; \
++ buf2[0] = 0; \
++ buf2[1] = src2[0]; \
++ scanline_func (dst, mask, \
++ buf1, buf2, left_tz, weight1, weight2, \
++ pixman_fixed_frac (vx), unit_x, 0, FALSE); \
++ dst += left_tz; \
++ if (have_mask && !mask_is_solid) \
++ mask += left_tz; \
++ vx += left_tz * unit_x; \
++ } \
++ if (width > 0) \
++ { \
++ scanline_func (dst, mask, \
++ src1, src2, width, weight1, weight2, vx, unit_x, 0, FALSE); \
++ dst += width; \
++ if (have_mask && !mask_is_solid) \
++ mask += width; \
++ vx += width * unit_x; \
++ } \
++ if (right_tz > 0) \
++ { \
++ buf1[0] = src1[src_image->bits.width - 1]; \
++ buf1[1] = 0; \
++ buf2[0] = src2[src_image->bits.width - 1]; \
++ buf2[1] = 0; \
++ scanline_func (dst, mask, \
++ buf1, buf2, right_tz, weight1, weight2, \
++ pixman_fixed_frac (vx), unit_x, 0, FALSE); \
++ dst += right_tz; \
++ if (have_mask && !mask_is_solid) \
++ mask += right_tz; \
++ } \
++ if (right_pad > 0) \
++ { \
++ buf1[0] = buf1[1] = 0; \
++ buf2[0] = buf2[1] = 0; \
++ scanline_func (dst, mask, \
++ buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, TRUE); \
++ } \
++ } \
++ else \
++ { \
++ scanline_func (dst, mask, src_first_line + src_stride * y1, \
++ src_first_line + src_stride * y2, width, \
++ weight1, weight2, vx, unit_x, max_vx, FALSE); \
++ } \
++ } \
++}
++
++/* A workaround for old sun studio, see: https://bugs.freedesktop.org/show_bug.cgi?id=32764 */
++#define FAST_BILINEAR_MAINLOOP_COMMON(scale_func_name, scanline_func, src_type_t, mask_type_t, \
++ dst_type_t, repeat_mode, have_mask, mask_is_solid) \
++ FAST_BILINEAR_MAINLOOP_INT(_ ## scale_func_name, scanline_func, src_type_t, mask_type_t,\
++ dst_type_t, repeat_mode, have_mask, mask_is_solid)
++
++#define SCALED_BILINEAR_FLAGS \
++ (FAST_PATH_SCALE_TRANSFORM | \
++ FAST_PATH_NO_ALPHA_MAP | \
++ FAST_PATH_BILINEAR_FILTER | \
++ FAST_PATH_NO_ACCESSORS | \
++ FAST_PATH_NARROW_FORMAT)
++
++#define SIMPLE_BILINEAR_FAST_PATH_PAD(op,s,d,func) \
++ { PIXMAN_OP_ ## op, \
++ PIXMAN_ ## s, \
++ (SCALED_BILINEAR_FLAGS | \
++ FAST_PATH_PAD_REPEAT | \
++ FAST_PATH_X_UNIT_POSITIVE), \
++ PIXMAN_null, 0, \
++ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
++ fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op, \
++ }
++
++#define SIMPLE_BILINEAR_FAST_PATH_NONE(op,s,d,func) \
++ { PIXMAN_OP_ ## op, \
++ PIXMAN_ ## s, \
++ (SCALED_BILINEAR_FLAGS | \
++ FAST_PATH_NONE_REPEAT | \
++ FAST_PATH_X_UNIT_POSITIVE), \
++ PIXMAN_null, 0, \
++ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
++ fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op, \
++ }
++
++#define SIMPLE_BILINEAR_FAST_PATH_COVER(op,s,d,func) \
++ { PIXMAN_OP_ ## op, \
++ PIXMAN_ ## s, \
++ SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP, \
++ PIXMAN_null, 0, \
++ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
++ fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op, \
++ }
++
++#define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_PAD(op,s,d,func) \
++ { PIXMAN_OP_ ## op, \
++ PIXMAN_ ## s, \
++ (SCALED_BILINEAR_FLAGS | \
++ FAST_PATH_PAD_REPEAT | \
++ FAST_PATH_X_UNIT_POSITIVE), \
++ PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \
++ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
++ fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op, \
++ }
++
++#define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NONE(op,s,d,func) \
++ { PIXMAN_OP_ ## op, \
++ PIXMAN_ ## s, \
++ (SCALED_BILINEAR_FLAGS | \
++ FAST_PATH_NONE_REPEAT | \
++ FAST_PATH_X_UNIT_POSITIVE), \
++ PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \
++ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
++ fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op, \
++ }
++
++#define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_COVER(op,s,d,func) \
++ { PIXMAN_OP_ ## op, \
++ PIXMAN_ ## s, \
++ SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP, \
++ PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \
++ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
++ fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op, \
++ }
++
++#define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_PAD(op,s,d,func) \
++ { PIXMAN_OP_ ## op, \
++ PIXMAN_ ## s, \
++ (SCALED_BILINEAR_FLAGS | \
++ FAST_PATH_PAD_REPEAT | \
++ FAST_PATH_X_UNIT_POSITIVE), \
++ PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \
++ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
++ fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op, \
++ }
++
++#define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NONE(op,s,d,func) \
++ { PIXMAN_OP_ ## op, \
++ PIXMAN_ ## s, \
++ (SCALED_BILINEAR_FLAGS | \
++ FAST_PATH_NONE_REPEAT | \
++ FAST_PATH_X_UNIT_POSITIVE), \
++ PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \
++ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
++ fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op, \
++ }
++
++#define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_COVER(op,s,d,func) \
++ { PIXMAN_OP_ ## op, \
++ PIXMAN_ ## s, \
++ SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP, \
++ PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \
++ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
++ fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op, \
++ }
++
++/* Prefer the use of 'cover' variant, because it is faster */
++#define SIMPLE_BILINEAR_FAST_PATH(op,s,d,func) \
++ SIMPLE_BILINEAR_FAST_PATH_COVER (op,s,d,func), \
++ SIMPLE_BILINEAR_FAST_PATH_NONE (op,s,d,func), \
++ SIMPLE_BILINEAR_FAST_PATH_PAD (op,s,d,func)
++
++#define SIMPLE_BILINEAR_A8_MASK_FAST_PATH(op,s,d,func) \
++ SIMPLE_BILINEAR_A8_MASK_FAST_PATH_COVER (op,s,d,func), \
++ SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NONE (op,s,d,func), \
++ SIMPLE_BILINEAR_A8_MASK_FAST_PATH_PAD (op,s,d,func)
++
++#define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH(op,s,d,func) \
++ SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_COVER (op,s,d,func), \
++ SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NONE (op,s,d,func), \
++ SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_PAD (op,s,d,func)
++
+ #endif
+--
+1.6.6.1
+