From: Mans Rullgard Date: Sun, 3 Aug 2008 17:13:06 +0000 (+0100) Subject: ARM: NEON optimised vector_fmul X-Git-Url: http://git.mansr.com/?p=ffmpeg.mru;a=commitdiff_plain;h=ba46eb14e3be96b627fd096aacaa4dbb2e186281 ARM: NEON optimised vector_fmul --- diff --git a/libavcodec/armv4l/dsputil_neon.c b/libavcodec/armv4l/dsputil_neon.c index 6c44940..c6fc173 100644 --- a/libavcodec/armv4l/dsputil_neon.c +++ b/libavcodec/armv4l/dsputil_neon.c @@ -91,6 +91,7 @@ void ff_h264_h_loop_filter_chroma_neon(uint8_t *pix, int stride, int alpha, void ff_h264_idct_add_neon(uint8_t *dst, DCTELEM *block, int stride); void ff_h264_idct_dc_add_neon(uint8_t *dst, DCTELEM *block, int stride); +void ff_vector_fmul_neon(float *dst, const float *src, int len); void ff_vector_fmul_window_neon(float *dst, const float *src0, const float *src1, const float *win, float add_bias, int len); @@ -168,6 +169,7 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx) c->h264_idct_add = ff_h264_idct_add_neon; c->h264_idct_dc_add = ff_h264_idct_dc_add_neon; + c->vector_fmul = ff_vector_fmul_neon; c->vector_fmul_window = ff_vector_fmul_window_neon; c->float_to_int16 = ff_float_to_int16_neon; diff --git a/libavcodec/armv4l/dsputil_neon_s.S b/libavcodec/armv4l/dsputil_neon_s.S index 49a09b8..7310700 100644 --- a/libavcodec/armv4l/dsputil_neon_s.S +++ b/libavcodec/armv4l/dsputil_neon_s.S @@ -324,6 +324,23 @@ extern ff_float_to_int16_interleave_neon pop {r4,r5,pc} .endfunc +extern ff_vector_fmul_neon + mov r3, r0 + vld1.64 {d0-d3}, [r0,:128]! + vld1.64 {d4-d7}, [r1,:128]! + dmb +1: subs r2, r2, #8 + vmul.f32 q8, q0, q2 + vmul.f32 q9, q1, q3 + beq 2f + vld1.64 {d0-d3}, [r0,:128]! + vld1.64 {d4-d7}, [r1,:128]! + vst1.64 {d16-d19}, [r3,:128]! + b 1b +2: vst1.64 {d16-d19}, [r3,:128]! + bx lr + .endfunc + extern ff_vector_fmul_window_neon vld1.32 {d16[],d17[]}, [sp,:32] push {r4,r5,lr}