diff options
Diffstat (limited to 'recipes/xorg-lib/pixman-0.21.6/0007-Main-loop-template-for-fast-single-pass-bilinear-sca.patch')
-rw-r--r-- | recipes/xorg-lib/pixman-0.21.6/0007-Main-loop-template-for-fast-single-pass-bilinear-sca.patch | 466 |
1 files changed, 466 insertions, 0 deletions
diff --git a/recipes/xorg-lib/pixman-0.21.6/0007-Main-loop-template-for-fast-single-pass-bilinear-sca.patch b/recipes/xorg-lib/pixman-0.21.6/0007-Main-loop-template-for-fast-single-pass-bilinear-sca.patch new file mode 100644 index 0000000000..c5dab5c31f --- /dev/null +++ b/recipes/xorg-lib/pixman-0.21.6/0007-Main-loop-template-for-fast-single-pass-bilinear-sca.patch @@ -0,0 +1,466 @@ +From d506bf68fd0e9a1c5dd484daee70631699918387 Mon Sep 17 00:00:00 2001 +From: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Mon, 21 Feb 2011 01:29:02 +0200 +Subject: [PATCH 07/40] Main loop template for fast single pass bilinear scaling + +Can be used for implementing SIMD optimized fast path +functions which work with bilinear scaled source images. + +Similar to the template for nearest scaling main loop, the +following types of mask are supported: +1. no mask +2. non-scaled a8 mask with SAMPLES_COVER_CLIP flag +3. solid mask + +PAD repeat is fully supported. NONE repeat is partially +supported (right now only works if source image has alpha +channel or when alpha channel of the source image does not +have any effect on the compositing operation). +--- + pixman/pixman-fast-path.h | 432 +++++++++++++++++++++++++++++++++++++++++++++ + 1 files changed, 432 insertions(+), 0 deletions(-) + +diff --git a/pixman/pixman-fast-path.h b/pixman/pixman-fast-path.h +index d081222..1885d47 100644 +--- a/pixman/pixman-fast-path.h ++++ b/pixman/pixman-fast-path.h +@@ -587,4 +587,436 @@ fast_composite_scaled_nearest ## scale_func_name (pixman_implementation_t *imp, + SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NONE (op,s,d,func), \ + SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_PAD (op,s,d,func) + ++/*****************************************************************************/ ++ ++/* ++ * Identify 5 zones in each scanline for bilinear scaling. Depending on ++ * whether 2 pixels to be interpolated are fetched from the image itself, ++ * from the padding area around it or from both image and padding area. ++ */ ++static force_inline void ++bilinear_pad_repeat_get_scanline_bounds (int32_t source_image_width, ++ pixman_fixed_t vx, ++ pixman_fixed_t unit_x, ++ int32_t * left_pad, ++ int32_t * left_tz, ++ int32_t * width, ++ int32_t * right_tz, ++ int32_t * right_pad) ++{ ++ int width1 = *width, left_pad1, right_pad1; ++ int width2 = *width, left_pad2, right_pad2; ++ ++ pad_repeat_get_scanline_bounds (source_image_width, vx, unit_x, ++ &width1, &left_pad1, &right_pad1); ++ pad_repeat_get_scanline_bounds (source_image_width, vx + pixman_fixed_1, ++ unit_x, &width2, &left_pad2, &right_pad2); ++ ++ *left_pad = left_pad2; ++ *left_tz = left_pad1 - left_pad2; ++ *right_tz = right_pad2 - right_pad1; ++ *right_pad = right_pad1; ++ *width -= *left_pad + *left_tz + *right_tz + *right_pad; ++} ++ ++/* ++ * Main loop template for single pass bilinear scaling. It needs to be ++ * provided with 'scanline_func' which should do the compositing operation. ++ * The needed function has the following prototype: ++ * ++ * scanline_func (dst_type_t * dst, ++ * const mask_type_ * mask, ++ * const src_type_t * src_top, ++ * const src_type_t * src_bottom, ++ * int32_t width, ++ * int weight_top, ++ * int weight_bottom, ++ * pixman_fixed_t vx, ++ * pixman_fixed_t unit_x, ++ * pixman_fixed_t max_vx, ++ * pixman_bool_t zero_src) ++ * ++ * Where: ++ * dst - destination scanline buffer for storing results ++ * mask - mask buffer (or single value for solid mask) ++ * src_top, src_bottom - two source scanlines ++ * width - number of pixels to process ++ * weight_top - weight of the top row for interpolation ++ * weight_bottom - weight of the bottom row for interpolation ++ * vx - initial position for fetching the first pair of ++ * pixels from the source buffer ++ * unit_x - position increment needed to move to the next pair ++ * of pixels ++ * max_vx - image size as a fixed point value, can be used for ++ * implementing NORMAL repeat (when it is supported) ++ * zero_src - boolean hint variable, which is set to TRUE when ++ * all source pixels are fetched from zero padding ++ * zone for NONE repeat ++ * ++ * Note: normally the sum of 'weight_top' and 'weight_bottom' is equal to 256, ++ * but sometimes it may be less than that for NONE repeat when handling ++ * fuzzy antialiased top or bottom image edges. Also both top and ++ * bottom weight variables are guaranteed to have value in 0-255 ++ * range and can fit into unsigned byte or be used with 8-bit SIMD ++ * multiplication instructions. ++ */ ++#define FAST_BILINEAR_MAINLOOP_INT(scale_func_name, scanline_func, src_type_t, mask_type_t, \ ++ dst_type_t, repeat_mode, have_mask, mask_is_solid) \ ++static void \ ++fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp, \ ++ pixman_op_t op, \ ++ pixman_image_t * src_image, \ ++ pixman_image_t * mask_image, \ ++ pixman_image_t * dst_image, \ ++ int32_t src_x, \ ++ int32_t src_y, \ ++ int32_t mask_x, \ ++ int32_t mask_y, \ ++ int32_t dst_x, \ ++ int32_t dst_y, \ ++ int32_t width, \ ++ int32_t height) \ ++{ \ ++ dst_type_t *dst_line; \ ++ mask_type_t *mask_line; \ ++ src_type_t *src_first_line; \ ++ int y1, y2; \ ++ pixman_fixed_t max_vx = INT32_MAX; /* suppress uninitialized variable warning */ \ ++ pixman_vector_t v; \ ++ pixman_fixed_t vx, vy; \ ++ pixman_fixed_t unit_x, unit_y; \ ++ int32_t left_pad, left_tz, right_tz, right_pad; \ ++ \ ++ dst_type_t *dst; \ ++ mask_type_t solid_mask; \ ++ const mask_type_t *mask = &solid_mask; \ ++ int src_stride, mask_stride, dst_stride; \ ++ \ ++ PIXMAN_IMAGE_GET_LINE (dst_image, dst_x, dst_y, dst_type_t, dst_stride, dst_line, 1); \ ++ if (have_mask) \ ++ { \ ++ if (mask_is_solid) \ ++ { \ ++ solid_mask = _pixman_image_get_solid (imp, mask_image, dst_image->bits.format); \ ++ mask_stride = 0; \ ++ } \ ++ else \ ++ { \ ++ PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type_t, \ ++ mask_stride, mask_line, 1); \ ++ } \ ++ } \ ++ /* pass in 0 instead of src_x and src_y because src_x and src_y need to be \ ++ * transformed from destination space to source space */ \ ++ PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, src_type_t, src_stride, src_first_line, 1); \ ++ \ ++ /* reference point is the center of the pixel */ \ ++ v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2; \ ++ v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2; \ ++ v.vector[2] = pixman_fixed_1; \ ++ \ ++ if (!pixman_transform_point_3d (src_image->common.transform, &v)) \ ++ return; \ ++ \ ++ unit_x = src_image->common.transform->matrix[0][0]; \ ++ unit_y = src_image->common.transform->matrix[1][1]; \ ++ \ ++ v.vector[0] -= pixman_fixed_1 / 2; \ ++ v.vector[1] -= pixman_fixed_1 / 2; \ ++ \ ++ vy = v.vector[1]; \ ++ \ ++ if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD || \ ++ PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \ ++ { \ ++ bilinear_pad_repeat_get_scanline_bounds (src_image->bits.width, v.vector[0], unit_x, \ ++ &left_pad, &left_tz, &width, &right_tz, &right_pad); \ ++ if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD) \ ++ { \ ++ /* PAD repeat does not need special handling for 'transition zones' and */ \ ++ /* they can be combined with 'padding zones' safely */ \ ++ left_pad += left_tz; \ ++ right_pad += right_tz; \ ++ left_tz = right_tz = 0; \ ++ } \ ++ v.vector[0] += left_pad * unit_x; \ ++ } \ ++ \ ++ while (--height >= 0) \ ++ { \ ++ int weight1, weight2; \ ++ dst = dst_line; \ ++ dst_line += dst_stride; \ ++ vx = v.vector[0]; \ ++ if (have_mask && !mask_is_solid) \ ++ { \ ++ mask = mask_line; \ ++ mask_line += mask_stride; \ ++ } \ ++ \ ++ y1 = pixman_fixed_to_int (vy); \ ++ weight2 = (vy >> 8) & 0xff; \ ++ if (weight2) \ ++ { \ ++ /* normal case, both row weights are in 0-255 range and fit unsigned byte */ \ ++ y2 = y1 + 1; \ ++ weight1 = 256 - weight2; \ ++ } \ ++ else \ ++ { \ ++ /* set both top and bottom row to the same scanline, and weights to 128+128 */ \ ++ y2 = y1; \ ++ weight1 = weight2 = 128; \ ++ } \ ++ vy += unit_y; \ ++ if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD) \ ++ { \ ++ src_type_t *src1, *src2; \ ++ src_type_t buf1[2]; \ ++ src_type_t buf2[2]; \ ++ repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height); \ ++ repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height); \ ++ src1 = src_first_line + src_stride * y1; \ ++ src2 = src_first_line + src_stride * y2; \ ++ \ ++ if (left_pad > 0) \ ++ { \ ++ buf1[0] = buf1[1] = src1[0]; \ ++ buf2[0] = buf2[1] = src2[0]; \ ++ scanline_func (dst, mask, \ ++ buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, FALSE); \ ++ dst += left_pad; \ ++ if (have_mask && !mask_is_solid) \ ++ mask += left_pad; \ ++ } \ ++ if (width > 0) \ ++ { \ ++ scanline_func (dst, mask, \ ++ src1, src2, width, weight1, weight2, vx, unit_x, 0, FALSE); \ ++ dst += width; \ ++ if (have_mask && !mask_is_solid) \ ++ mask += width; \ ++ } \ ++ if (right_pad > 0) \ ++ { \ ++ buf1[0] = buf1[1] = src1[src_image->bits.width - 1]; \ ++ buf2[0] = buf2[1] = src2[src_image->bits.width - 1]; \ ++ scanline_func (dst, mask, \ ++ buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, FALSE); \ ++ } \ ++ } \ ++ else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \ ++ { \ ++ src_type_t *src1, *src2; \ ++ src_type_t buf1[2]; \ ++ src_type_t buf2[2]; \ ++ /* handle top/bottom zero padding by just setting weights to 0 if needed */ \ ++ if (y1 < 0) \ ++ { \ ++ weight1 = 0; \ ++ y1 = 0; \ ++ } \ ++ if (y1 >= src_image->bits.height) \ ++ { \ ++ weight1 = 0; \ ++ y1 = src_image->bits.height - 1; \ ++ } \ ++ if (y2 < 0) \ ++ { \ ++ weight2 = 0; \ ++ y2 = 0; \ ++ } \ ++ if (y2 >= src_image->bits.height) \ ++ { \ ++ weight2 = 0; \ ++ y2 = src_image->bits.height - 1; \ ++ } \ ++ src1 = src_first_line + src_stride * y1; \ ++ src2 = src_first_line + src_stride * y2; \ ++ \ ++ if (left_pad > 0) \ ++ { \ ++ buf1[0] = buf1[1] = 0; \ ++ buf2[0] = buf2[1] = 0; \ ++ scanline_func (dst, mask, \ ++ buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, TRUE); \ ++ dst += left_pad; \ ++ if (have_mask && !mask_is_solid) \ ++ mask += left_pad; \ ++ } \ ++ if (left_tz > 0) \ ++ { \ ++ buf1[0] = 0; \ ++ buf1[1] = src1[0]; \ ++ buf2[0] = 0; \ ++ buf2[1] = src2[0]; \ ++ scanline_func (dst, mask, \ ++ buf1, buf2, left_tz, weight1, weight2, \ ++ pixman_fixed_frac (vx), unit_x, 0, FALSE); \ ++ dst += left_tz; \ ++ if (have_mask && !mask_is_solid) \ ++ mask += left_tz; \ ++ vx += left_tz * unit_x; \ ++ } \ ++ if (width > 0) \ ++ { \ ++ scanline_func (dst, mask, \ ++ src1, src2, width, weight1, weight2, vx, unit_x, 0, FALSE); \ ++ dst += width; \ ++ if (have_mask && !mask_is_solid) \ ++ mask += width; \ ++ vx += width * unit_x; \ ++ } \ ++ if (right_tz > 0) \ ++ { \ ++ buf1[0] = src1[src_image->bits.width - 1]; \ ++ buf1[1] = 0; \ ++ buf2[0] = src2[src_image->bits.width - 1]; \ ++ buf2[1] = 0; \ ++ scanline_func (dst, mask, \ ++ buf1, buf2, right_tz, weight1, weight2, \ ++ pixman_fixed_frac (vx), unit_x, 0, FALSE); \ ++ dst += right_tz; \ ++ if (have_mask && !mask_is_solid) \ ++ mask += right_tz; \ ++ } \ ++ if (right_pad > 0) \ ++ { \ ++ buf1[0] = buf1[1] = 0; \ ++ buf2[0] = buf2[1] = 0; \ ++ scanline_func (dst, mask, \ ++ buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, TRUE); \ ++ } \ ++ } \ ++ else \ ++ { \ ++ scanline_func (dst, mask, src_first_line + src_stride * y1, \ ++ src_first_line + src_stride * y2, width, \ ++ weight1, weight2, vx, unit_x, max_vx, FALSE); \ ++ } \ ++ } \ ++} ++ ++/* A workaround for old sun studio, see: https://bugs.freedesktop.org/show_bug.cgi?id=32764 */ ++#define FAST_BILINEAR_MAINLOOP_COMMON(scale_func_name, scanline_func, src_type_t, mask_type_t, \ ++ dst_type_t, repeat_mode, have_mask, mask_is_solid) \ ++ FAST_BILINEAR_MAINLOOP_INT(_ ## scale_func_name, scanline_func, src_type_t, mask_type_t,\ ++ dst_type_t, repeat_mode, have_mask, mask_is_solid) ++ ++#define SCALED_BILINEAR_FLAGS \ ++ (FAST_PATH_SCALE_TRANSFORM | \ ++ FAST_PATH_NO_ALPHA_MAP | \ ++ FAST_PATH_BILINEAR_FILTER | \ ++ FAST_PATH_NO_ACCESSORS | \ ++ FAST_PATH_NARROW_FORMAT) ++ ++#define SIMPLE_BILINEAR_FAST_PATH_PAD(op,s,d,func) \ ++ { PIXMAN_OP_ ## op, \ ++ PIXMAN_ ## s, \ ++ (SCALED_BILINEAR_FLAGS | \ ++ FAST_PATH_PAD_REPEAT | \ ++ FAST_PATH_X_UNIT_POSITIVE), \ ++ PIXMAN_null, 0, \ ++ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ ++ fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op, \ ++ } ++ ++#define SIMPLE_BILINEAR_FAST_PATH_NONE(op,s,d,func) \ ++ { PIXMAN_OP_ ## op, \ ++ PIXMAN_ ## s, \ ++ (SCALED_BILINEAR_FLAGS | \ ++ FAST_PATH_NONE_REPEAT | \ ++ FAST_PATH_X_UNIT_POSITIVE), \ ++ PIXMAN_null, 0, \ ++ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ ++ fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op, \ ++ } ++ ++#define SIMPLE_BILINEAR_FAST_PATH_COVER(op,s,d,func) \ ++ { PIXMAN_OP_ ## op, \ ++ PIXMAN_ ## s, \ ++ SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP, \ ++ PIXMAN_null, 0, \ ++ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ ++ fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op, \ ++ } ++ ++#define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_PAD(op,s,d,func) \ ++ { PIXMAN_OP_ ## op, \ ++ PIXMAN_ ## s, \ ++ (SCALED_BILINEAR_FLAGS | \ ++ FAST_PATH_PAD_REPEAT | \ ++ FAST_PATH_X_UNIT_POSITIVE), \ ++ PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \ ++ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ ++ fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op, \ ++ } ++ ++#define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NONE(op,s,d,func) \ ++ { PIXMAN_OP_ ## op, \ ++ PIXMAN_ ## s, \ ++ (SCALED_BILINEAR_FLAGS | \ ++ FAST_PATH_NONE_REPEAT | \ ++ FAST_PATH_X_UNIT_POSITIVE), \ ++ PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \ ++ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ ++ fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op, \ ++ } ++ ++#define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_COVER(op,s,d,func) \ ++ { PIXMAN_OP_ ## op, \ ++ PIXMAN_ ## s, \ ++ SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP, \ ++ PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \ ++ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ ++ fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op, \ ++ } ++ ++#define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_PAD(op,s,d,func) \ ++ { PIXMAN_OP_ ## op, \ ++ PIXMAN_ ## s, \ ++ (SCALED_BILINEAR_FLAGS | \ ++ FAST_PATH_PAD_REPEAT | \ ++ FAST_PATH_X_UNIT_POSITIVE), \ ++ PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \ ++ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ ++ fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op, \ ++ } ++ ++#define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NONE(op,s,d,func) \ ++ { PIXMAN_OP_ ## op, \ ++ PIXMAN_ ## s, \ ++ (SCALED_BILINEAR_FLAGS | \ ++ FAST_PATH_NONE_REPEAT | \ ++ FAST_PATH_X_UNIT_POSITIVE), \ ++ PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \ ++ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ ++ fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op, \ ++ } ++ ++#define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_COVER(op,s,d,func) \ ++ { PIXMAN_OP_ ## op, \ ++ PIXMAN_ ## s, \ ++ SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP, \ ++ PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \ ++ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ ++ fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op, \ ++ } ++ ++/* Prefer the use of 'cover' variant, because it is faster */ ++#define SIMPLE_BILINEAR_FAST_PATH(op,s,d,func) \ ++ SIMPLE_BILINEAR_FAST_PATH_COVER (op,s,d,func), \ ++ SIMPLE_BILINEAR_FAST_PATH_NONE (op,s,d,func), \ ++ SIMPLE_BILINEAR_FAST_PATH_PAD (op,s,d,func) ++ ++#define SIMPLE_BILINEAR_A8_MASK_FAST_PATH(op,s,d,func) \ ++ SIMPLE_BILINEAR_A8_MASK_FAST_PATH_COVER (op,s,d,func), \ ++ SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NONE (op,s,d,func), \ ++ SIMPLE_BILINEAR_A8_MASK_FAST_PATH_PAD (op,s,d,func) ++ ++#define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH(op,s,d,func) \ ++ SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_COVER (op,s,d,func), \ ++ SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NONE (op,s,d,func), \ ++ SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_PAD (op,s,d,func) ++ + #endif +-- +1.6.6.1 + |