aboutsummaryrefslogtreecommitdiffstats
path: root/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106769.patch
diff options
context:
space:
mode:
Diffstat (limited to 'toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106769.patch')
-rw-r--r--toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106769.patch1281
1 files changed, 1281 insertions, 0 deletions
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106769.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106769.patch
new file mode 100644
index 0000000000..3a149231f8
--- /dev/null
+++ b/toolchain-layer/recipes-devtools/gcc/gcc-4.6/linaro/gcc-4.6-linaro-r106769.patch
@@ -0,0 +1,1281 @@
+2011-07-11 Ira Rosen <ira.rosen@linaro.org>
+
+ Backport from FSF:
+ 2011-06-16 Ira Rosen <ira.rosen@linaro.org>
+
+ gcc/
+ * tree-vectorizer.h (vect_recog_func_ptr): Change the first
+ argument to be a VEC of statements.
+ * tree-vect-loop.c (vect_determine_vectorization_factor): Remove the
+ assert that pattern statements have to have their vector type set.
+ * tree-vect-patterns.c (vect_recog_widen_sum_pattern):
+ Change the first argument to be a VEC of statements. Update
+ documentation.
+ (vect_recog_dot_prod_pattern, vect_recog_pow_pattern): Likewise.
+ (vect_handle_widen_mult_by_const): New function.
+ (vect_recog_widen_mult_pattern): Change the first argument to be a
+ VEC of statements. Update documentation. Check that the constant is
+ INTEGER_CST. Support multiplication by a constant that fits an
+ intermediate type - call vect_handle_widen_mult_by_const.
+ (vect_pattern_recog_1): Update vect_recog_func_ptr and its
+ call. Handle additional pattern statements if necessary.
+
+ gcc/testsuite/
+ * gcc.dg/vect/vect-widen-mult-half-u8.c: New test.
+
+ and
+ 2011-06-30 Ira Rosen <ira.rosen@linaro.org>
+
+ gcc/
+ * tree-vect-loop.c (vect_determine_vectorization_factor): Handle
+ both pattern and original statements if necessary.
+ (vect_transform_loop): Likewise.
+ * tree-vect-patterns.c (vect_pattern_recog): Update documentation.
+ * tree-vect-stmts.c (vect_mark_relevant): Add new argument.
+ Mark the pattern statement only if the original statement doesn't
+ have its own uses.
+ (process_use): Call vect_mark_relevant with additional parameter.
+ (vect_mark_stmts_to_be_vectorized): Likewise.
+ (vect_get_vec_def_for_operand): Use vectorized pattern statement.
+ (vect_analyze_stmt): Handle both pattern and original statements
+ if necessary.
+ (vect_transform_stmt): Don't store vectorized pattern statement
+ in the original statement.
+ (vect_is_simple_use_1): Use related pattern statement only if the
+ original statement is irrelevant.
+ * tree-vect-slp.c (vect_get_and_check_slp_defs): Likewise.
+
+ gcc/testsuite/
+ * gcc.dg/vect/slp-widen-mult-half.c: New test.
+ * gcc.dg/vect/vect-widen-mult-half.c: New test.
+
+=== added file 'gcc/testsuite/gcc.dg/vect/slp-widen-mult-half.c'
+Index: gcc-4_6-branch/gcc/testsuite/gcc.dg/vect/slp-widen-mult-half.c
+===================================================================
+--- /dev/null 1970-01-01 00:00:00.000000000 +0000
++++ gcc-4_6-branch/gcc/testsuite/gcc.dg/vect/slp-widen-mult-half.c 2012-01-09 15:03:29.156918805 -0800
+@@ -0,0 +1,52 @@
++/* { dg-require-effective-target vect_int } */
++
++#include "tree-vect.h"
++#include <stdlib.h>
++
++#define N 32
++#define COEF 32470
++#define COEF2 324700
++
++unsigned char in[N];
++int out[N];
++int out2[N];
++
++__attribute__ ((noinline)) void
++foo ()
++{
++ int i;
++
++ for (i = 0; i < N/2; i++)
++ {
++ out[2*i] = in[2*i] * COEF;
++ out2[2*i] = in[2*i] + COEF2;
++ out[2*i+1] = in[2*i+1] * COEF;
++ out2[2*i+1] = in[2*i+1] + COEF2;
++ }
++}
++
++int main (void)
++{
++ int i;
++
++ for (i = 0; i < N; i++)
++ {
++ in[i] = i;
++ __asm__ volatile ("");
++ }
++
++ foo ();
++
++ for (i = 0; i < N; i++)
++ if (out[i] != in[i] * COEF || out2[i] != in[i] + COEF2)
++ abort ();
++
++ return 0;
++}
++
++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_widen_mult_hi_to_si } } } */
++/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target vect_widen_mult_hi_to_si } } } */
++/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 2 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */
++/* { dg-final { scan-tree-dump-times "pattern recognized" 2 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */
++/* { dg-final { cleanup-tree-dump "vect" } } */
++
+Index: gcc-4_6-branch/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half-u8.c
+===================================================================
+--- /dev/null 1970-01-01 00:00:00.000000000 +0000
++++ gcc-4_6-branch/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half-u8.c 2012-01-09 15:03:29.156918805 -0800
+@@ -0,0 +1,59 @@
++/* { dg-require-effective-target vect_int } */
++
++#include "tree-vect.h"
++#include <stdlib.h>
++
++#define N 32
++#define COEF 32470
++
++unsigned char in[N];
++int out[N];
++
++__attribute__ ((noinline)) void
++foo ()
++{
++ int i;
++
++ for (i = 0; i < N; i++)
++ out[i] = in[i] * COEF;
++}
++
++__attribute__ ((noinline)) void
++bar ()
++{
++ int i;
++
++ for (i = 0; i < N; i++)
++ out[i] = COEF * in[i];
++}
++
++int main (void)
++{
++ int i;
++
++ for (i = 0; i < N; i++)
++ {
++ in[i] = i;
++ __asm__ volatile ("");
++ }
++
++ foo ();
++
++ for (i = 0; i < N; i++)
++ if (out[i] != in[i] * COEF)
++ abort ();
++
++ bar ();
++
++ for (i = 0; i < N; i++)
++ if (out[i] != in[i] * COEF)
++ abort ();
++
++ return 0;
++}
++
++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { target vect_widen_mult_hi_to_si } } } */
++/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 2 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */
++/* { dg-final { scan-tree-dump-times "pattern recognized" 2 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */
++/* { dg-final { cleanup-tree-dump "vect" } } */
++
+Index: gcc-4_6-branch/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half.c
+===================================================================
+--- /dev/null 1970-01-01 00:00:00.000000000 +0000
++++ gcc-4_6-branch/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half.c 2012-01-09 15:03:29.160918806 -0800
+@@ -0,0 +1,49 @@
++/* { dg-require-effective-target vect_int } */
++
++#include "tree-vect.h"
++#include <stdlib.h>
++
++#define N 32
++#define COEF 32470
++#define COEF2 324700
++
++unsigned char in[N];
++int out[N];
++int out2[N];
++
++__attribute__ ((noinline)) void
++foo (int a)
++{
++ int i;
++
++ for (i = 0; i < N; i++)
++ {
++ out[i] = in[i] * COEF;
++ out2[i] = in[i] + a;
++ }
++}
++
++int main (void)
++{
++ int i;
++
++ for (i = 0; i < N; i++)
++ {
++ in[i] = i;
++ __asm__ volatile ("");
++ }
++
++ foo (COEF2);
++
++ for (i = 0; i < N; i++)
++ if (out[i] != in[i] * COEF || out2[i] != in[i] + COEF2)
++ abort ();
++
++ return 0;
++}
++
++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_widen_mult_hi_to_si } } } */
++/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 1 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */
++/* { dg-final { scan-tree-dump-times "pattern recognized" 1 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */
++/* { dg-final { cleanup-tree-dump "vect" } } */
++
+Index: gcc-4_6-branch/gcc/tree-vect-loop.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/tree-vect-loop.c 2012-01-09 15:02:15.000000000 -0800
++++ gcc-4_6-branch/gcc/tree-vect-loop.c 2012-01-09 15:03:29.160918806 -0800
+@@ -181,6 +181,8 @@
+ stmt_vec_info stmt_info;
+ int i;
+ HOST_WIDE_INT dummy;
++ gimple stmt, pattern_stmt = NULL;
++ bool analyze_pattern_stmt = false;
+
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "=== vect_determine_vectorization_factor ===");
+@@ -241,12 +243,20 @@
+ }
+ }
+
+- for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
++ for (si = gsi_start_bb (bb); !gsi_end_p (si) || analyze_pattern_stmt;)
+ {
+- tree vf_vectype;
+- gimple stmt = gsi_stmt (si), pattern_stmt;
+- stmt_info = vinfo_for_stmt (stmt);
++ tree vf_vectype;
++
++ if (analyze_pattern_stmt)
++ {
++ stmt = pattern_stmt;
++ analyze_pattern_stmt = false;
++ }
++ else
++ stmt = gsi_stmt (si);
+
++ stmt_info = vinfo_for_stmt (stmt);
++
+ if (vect_print_dump_info (REPORT_DETAILS))
+ {
+ fprintf (vect_dump, "==> examining statement: ");
+@@ -276,10 +286,17 @@
+ {
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "skip.");
++ gsi_next (&si);
+ continue;
+ }
+ }
+
++ else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
++ && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
++ && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
++ || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
++ analyze_pattern_stmt = true;
++
+ if (gimple_get_lhs (stmt) == NULL_TREE)
+ {
+ if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
+@@ -311,9 +328,7 @@
+ }
+ else
+ {
+- gcc_assert (!STMT_VINFO_DATA_REF (stmt_info)
+- && !is_pattern_stmt_p (stmt_info));
+-
++ gcc_assert (!STMT_VINFO_DATA_REF (stmt_info));
+ scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
+ if (vect_print_dump_info (REPORT_DETAILS))
+ {
+@@ -385,6 +400,9 @@
+ if (!vectorization_factor
+ || (nunits > vectorization_factor))
+ vectorization_factor = nunits;
++
++ if (!analyze_pattern_stmt)
++ gsi_next (&si);
+ }
+ }
+
+@@ -4740,6 +4758,8 @@
+ tree cond_expr = NULL_TREE;
+ gimple_seq cond_expr_stmt_list = NULL;
+ bool do_peeling_for_loop_bound;
++ gimple stmt, pattern_stmt;
++ bool transform_pattern_stmt = false;
+
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "=== vec_transform_loop ===");
+@@ -4827,11 +4847,19 @@
+ }
+ }
+
+- for (si = gsi_start_bb (bb); !gsi_end_p (si);)
++ pattern_stmt = NULL;
++ for (si = gsi_start_bb (bb); !gsi_end_p (si) || transform_pattern_stmt;)
+ {
+- gimple stmt = gsi_stmt (si), pattern_stmt;
+ bool is_store;
+
++ if (transform_pattern_stmt)
++ {
++ stmt = pattern_stmt;
++ transform_pattern_stmt = false;
++ }
++ else
++ stmt = gsi_stmt (si);
++
+ if (vect_print_dump_info (REPORT_DETAILS))
+ {
+ fprintf (vect_dump, "------>vectorizing statement: ");
+@@ -4869,6 +4897,11 @@
+ continue;
+ }
+ }
++ else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
++ && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
++ && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
++ || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
++ transform_pattern_stmt = true;
+
+ gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
+ nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (
+@@ -4897,8 +4930,9 @@
+ /* Hybrid SLP stmts must be vectorized in addition to SLP. */
+ if (!vinfo_for_stmt (stmt) || PURE_SLP_STMT (stmt_info))
+ {
+- gsi_next (&si);
+- continue;
++ if (!transform_pattern_stmt)
++ gsi_next (&si);
++ continue;
+ }
+ }
+
+@@ -4917,7 +4951,7 @@
+ the chain. */
+ vect_remove_stores (DR_GROUP_FIRST_DR (stmt_info));
+ gsi_remove (&si, true);
+- continue;
++ continue;
+ }
+ else
+ {
+@@ -4927,7 +4961,9 @@
+ continue;
+ }
+ }
+- gsi_next (&si);
++
++ if (!transform_pattern_stmt)
++ gsi_next (&si);
+ } /* stmts in BB */
+ } /* BBs in loop */
+
+Index: gcc-4_6-branch/gcc/tree-vect-patterns.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/tree-vect-patterns.c 2012-01-09 15:02:15.000000000 -0800
++++ gcc-4_6-branch/gcc/tree-vect-patterns.c 2012-01-09 15:03:29.160918806 -0800
+@@ -39,10 +39,13 @@
+ #include "diagnostic-core.h"
+
+ /* Pattern recognition functions */
+-static gimple vect_recog_widen_sum_pattern (gimple *, tree *, tree *);
+-static gimple vect_recog_widen_mult_pattern (gimple *, tree *, tree *);
+-static gimple vect_recog_dot_prod_pattern (gimple *, tree *, tree *);
+-static gimple vect_recog_pow_pattern (gimple *, tree *, tree *);
++static gimple vect_recog_widen_sum_pattern (VEC (gimple, heap) **, tree *,
++ tree *);
++static gimple vect_recog_widen_mult_pattern (VEC (gimple, heap) **, tree *,
++ tree *);
++static gimple vect_recog_dot_prod_pattern (VEC (gimple, heap) **, tree *,
++ tree *);
++static gimple vect_recog_pow_pattern (VEC (gimple, heap) **, tree *, tree *);
+ static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
+ vect_recog_widen_mult_pattern,
+ vect_recog_widen_sum_pattern,
+@@ -142,9 +145,9 @@
+
+ Input:
+
+- * LAST_STMT: A stmt from which the pattern search begins. In the example,
+- when this function is called with S7, the pattern {S3,S4,S5,S6,S7} will be
+- detected.
++ * STMTS: Contains a stmt from which the pattern search begins. In the
++ example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
++ will be detected.
+
+ Output:
+
+@@ -165,12 +168,13 @@
+ inner-loop nested in an outer-loop that us being vectorized). */
+
+ static gimple
+-vect_recog_dot_prod_pattern (gimple *last_stmt, tree *type_in, tree *type_out)
++vect_recog_dot_prod_pattern (VEC (gimple, heap) **stmts, tree *type_in,
++ tree *type_out)
+ {
+- gimple stmt;
++ gimple stmt, last_stmt = VEC_index (gimple, *stmts, 0);
+ tree oprnd0, oprnd1;
+ tree oprnd00, oprnd01;
+- stmt_vec_info stmt_vinfo = vinfo_for_stmt (*last_stmt);
++ stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
+ tree type, half_type;
+ gimple pattern_stmt;
+ tree prod_type;
+@@ -178,10 +182,10 @@
+ struct loop *loop = LOOP_VINFO_LOOP (loop_info);
+ tree var, rhs;
+
+- if (!is_gimple_assign (*last_stmt))
++ if (!is_gimple_assign (last_stmt))
+ return NULL;
+
+- type = gimple_expr_type (*last_stmt);
++ type = gimple_expr_type (last_stmt);
+
+ /* Look for the following pattern
+ DX = (TYPE1) X;
+@@ -207,7 +211,7 @@
+ /* Starting from LAST_STMT, follow the defs of its uses in search
+ of the above pattern. */
+
+- if (gimple_assign_rhs_code (*last_stmt) != PLUS_EXPR)
++ if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
+ return NULL;
+
+ if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
+@@ -228,12 +232,12 @@
+
+ if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
+ return NULL;
+- oprnd0 = gimple_assign_rhs1 (*last_stmt);
+- oprnd1 = gimple_assign_rhs2 (*last_stmt);
++ oprnd0 = gimple_assign_rhs1 (last_stmt);
++ oprnd1 = gimple_assign_rhs2 (last_stmt);
+ if (!types_compatible_p (TREE_TYPE (oprnd0), type)
+ || !types_compatible_p (TREE_TYPE (oprnd1), type))
+ return NULL;
+- stmt = *last_stmt;
++ stmt = last_stmt;
+
+ if (widened_name_p (oprnd0, stmt, &half_type, &def_stmt, true))
+ {
+@@ -319,11 +323,79 @@
+
+ /* We don't allow changing the order of the computation in the inner-loop
+ when doing outer-loop vectorization. */
+- gcc_assert (!nested_in_vect_loop_p (loop, *last_stmt));
++ gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
+
+ return pattern_stmt;
+ }
+
++/* Handle two cases of multiplication by a constant. The first one is when
++ the constant, CONST_OPRND, fits the type (HALF_TYPE) of the second
++ operand (OPRND). In that case, we can peform widen-mult from HALF_TYPE to
++ TYPE.
++
++ Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
++ HALF_TYPE, and CONST_OPRND fits an intermediate type (2 times smaller than
++ TYPE), we can perform widen-mult from the intermediate type to TYPE and
++ replace a_T = (TYPE) a_t; with a_it - (interm_type) a_t; */
++
++static bool
++vect_handle_widen_mult_by_const (tree const_oprnd, tree *oprnd,
++ VEC (gimple, heap) **stmts, tree type,
++ tree *half_type, gimple def_stmt)
++{
++ tree new_type, new_oprnd, tmp;
++ gimple new_stmt;
++
++ if (int_fits_type_p (const_oprnd, *half_type))
++ {
++ /* CONST_OPRND is a constant of HALF_TYPE. */
++ *oprnd = gimple_assign_rhs1 (def_stmt);
++ return true;
++ }
++
++ if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4)
++ || !vinfo_for_stmt (def_stmt))
++ return false;
++
++ /* TYPE is 4 times bigger than HALF_TYPE, try widen-mult for
++ a type 2 times bigger than HALF_TYPE. */
++ new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2,
++ TYPE_UNSIGNED (type));
++ if (!int_fits_type_p (const_oprnd, new_type))
++ return false;
++
++ /* Use NEW_TYPE for widen_mult. */
++ if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
++ {
++ new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
++ /* Check if the already created pattern stmt is what we need. */
++ if (!is_gimple_assign (new_stmt)
++ || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
++ || TREE_TYPE (gimple_assign_lhs (new_stmt)) != new_type)
++ return false;
++
++ *oprnd = gimple_assign_lhs (new_stmt);
++ }
++ else
++ {
++ /* Create a_T = (NEW_TYPE) a_t; */
++ *oprnd = gimple_assign_rhs1 (def_stmt);
++ tmp = create_tmp_var (new_type, NULL);
++ add_referenced_var (tmp);
++ new_oprnd = make_ssa_name (tmp, NULL);
++ new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd, *oprnd,
++ NULL_TREE);
++ SSA_NAME_DEF_STMT (new_oprnd) = new_stmt;
++ STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
++ VEC_safe_push (gimple, heap, *stmts, def_stmt);
++ *oprnd = new_oprnd;
++ }
++
++ *half_type = new_type;
++ return true;
++}
++
++
+ /* Function vect_recog_widen_mult_pattern
+
+ Try to find the following pattern:
+@@ -361,28 +433,47 @@
+ S3 a_T = (TYPE) a_t;
+ S5 prod_T = a_T * CONST;
+
+- Input:
++ A special case of multiplication by constants is when 'TYPE' is 4 times
++ bigger than 'type', but CONST fits an intermediate type 2 times smaller
++ than 'TYPE'. In that case we create an additional pattern stmt for S3
++ to create a variable of the intermediate type, and perform widen-mult
++ on the intermediate type as well:
++
++ type a_t;
++ interm_type a_it;
++ TYPE a_T, prod_T, prod_T';
++
++ S1 a_t = ;
++ S3 a_T = (TYPE) a_t;
++ '--> a_it = (interm_type) a_t;
++ S5 prod_T = a_T * CONST;
++ '--> prod_T' = a_it w* CONST;
++
++ Input/Output:
+
+- * LAST_STMT: A stmt from which the pattern search begins. In the example,
+- when this function is called with S5, the pattern {S3,S4,S5,(S6)} is
+- detected.
++ * STMTS: Contains a stmt from which the pattern search begins. In the
++ example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
++ is detected. In case of unsigned widen-mult, the original stmt (S5) is
++ replaced with S6 in STMTS. In case of multiplication by a constant
++ of an intermediate type (the last case above), STMTS also contains S3
++ (inserted before S5).
+
+- Output:
++ Output:
+
+- * TYPE_IN: The type of the input arguments to the pattern.
++ * TYPE_IN: The type of the input arguments to the pattern.
+
+- * TYPE_OUT: The type of the output of this pattern.
++ * TYPE_OUT: The type of the output of this pattern.
+
+- * Return value: A new stmt that will be used to replace the sequence of
+- stmts that constitute the pattern. In this case it will be:
+- WIDEN_MULT <a_t, b_t>
+- */
++ * Return value: A new stmt that will be used to replace the sequence of
++ stmts that constitute the pattern. In this case it will be:
++ WIDEN_MULT <a_t, b_t>
++*/
+
+ static gimple
+-vect_recog_widen_mult_pattern (gimple *last_stmt,
+- tree *type_in,
+- tree *type_out)
++vect_recog_widen_mult_pattern (VEC (gimple, heap) **stmts,
++ tree *type_in, tree *type_out)
+ {
++ gimple last_stmt = VEC_pop (gimple, *stmts);
+ gimple def_stmt0, def_stmt1;
+ tree oprnd0, oprnd1;
+ tree type, half_type0, half_type1;
+@@ -395,27 +486,27 @@
+ VEC (tree, heap) *dummy_vec;
+ bool op0_ok, op1_ok;
+
+- if (!is_gimple_assign (*last_stmt))
++ if (!is_gimple_assign (last_stmt))
+ return NULL;
+
+- type = gimple_expr_type (*last_stmt);
++ type = gimple_expr_type (last_stmt);
+
+ /* Starting from LAST_STMT, follow the defs of its uses in search
+ of the above pattern. */
+
+- if (gimple_assign_rhs_code (*last_stmt) != MULT_EXPR)
++ if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
+ return NULL;
+
+- oprnd0 = gimple_assign_rhs1 (*last_stmt);
+- oprnd1 = gimple_assign_rhs2 (*last_stmt);
++ oprnd0 = gimple_assign_rhs1 (last_stmt);
++ oprnd1 = gimple_assign_rhs2 (last_stmt);
+ if (!types_compatible_p (TREE_TYPE (oprnd0), type)
+ || !types_compatible_p (TREE_TYPE (oprnd1), type))
+ return NULL;
+
+ /* Check argument 0. */
+- op0_ok = widened_name_p (oprnd0, *last_stmt, &half_type0, &def_stmt0, false);
++ op0_ok = widened_name_p (oprnd0, last_stmt, &half_type0, &def_stmt0, false);
+ /* Check argument 1. */
+- op1_ok = widened_name_p (oprnd1, *last_stmt, &half_type1, &def_stmt1, false);
++ op1_ok = widened_name_p (oprnd1, last_stmt, &half_type1, &def_stmt1, false);
+
+ /* In case of multiplication by a constant one of the operands may not match
+ the pattern, but not both. */
+@@ -429,29 +520,21 @@
+ }
+ else if (!op0_ok)
+ {
+- if (CONSTANT_CLASS_P (oprnd0)
+- && TREE_CODE (half_type1) == INTEGER_TYPE
+- && tree_int_cst_lt (oprnd0, TYPE_MAXVAL (half_type1))
+- && tree_int_cst_lt (TYPE_MINVAL (half_type1), oprnd0))
+- {
+- /* OPRND0 is a constant of HALF_TYPE1. */
+- half_type0 = half_type1;
+- oprnd1 = gimple_assign_rhs1 (def_stmt1);
+- }
++ if (TREE_CODE (oprnd0) == INTEGER_CST
++ && TREE_CODE (half_type1) == INTEGER_TYPE
++ && vect_handle_widen_mult_by_const (oprnd0, &oprnd1, stmts, type,
++ &half_type1, def_stmt1))
++ half_type0 = half_type1;
+ else
+ return NULL;
+ }
+ else if (!op1_ok)
+ {
+- if (CONSTANT_CLASS_P (oprnd1)
++ if (TREE_CODE (oprnd1) == INTEGER_CST
+ && TREE_CODE (half_type0) == INTEGER_TYPE
+- && tree_int_cst_lt (oprnd1, TYPE_MAXVAL (half_type0))
+- && tree_int_cst_lt (TYPE_MINVAL (half_type0), oprnd1))
+- {
+- /* OPRND1 is a constant of HALF_TYPE0. */
+- half_type1 = half_type0;
+- oprnd0 = gimple_assign_rhs1 (def_stmt0);
+- }
++ && vect_handle_widen_mult_by_const (oprnd1, &oprnd0, stmts, type,
++ &half_type0, def_stmt0))
++ half_type1 = half_type0;
+ else
+ return NULL;
+ }
+@@ -461,7 +544,7 @@
+ Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
+ if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
+ {
+- tree lhs = gimple_assign_lhs (*last_stmt), use_lhs;
++ tree lhs = gimple_assign_lhs (last_stmt), use_lhs;
+ imm_use_iterator imm_iter;
+ use_operand_p use_p;
+ int nuses = 0;
+@@ -491,7 +574,7 @@
+ return NULL;
+
+ type = use_type;
+- *last_stmt = use_stmt;
++ last_stmt = use_stmt;
+ }
+
+ if (!types_compatible_p (half_type0, half_type1))
+@@ -506,7 +589,7 @@
+ vectype_out = get_vectype_for_scalar_type (type);
+ if (!vectype
+ || !vectype_out
+- || !supportable_widening_operation (WIDEN_MULT_EXPR, *last_stmt,
++ || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
+ vectype_out, vectype,
+ &dummy, &dummy, &dummy_code,
+ &dummy_code, &dummy_int, &dummy_vec))
+@@ -524,6 +607,7 @@
+ if (vect_print_dump_info (REPORT_DETAILS))
+ print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
+
++ VEC_safe_push (gimple, heap, *stmts, last_stmt);
+ return pattern_stmt;
+ }
+
+@@ -555,16 +639,17 @@
+ */
+
+ static gimple
+-vect_recog_pow_pattern (gimple *last_stmt, tree *type_in, tree *type_out)
++vect_recog_pow_pattern (VEC (gimple, heap) **stmts, tree *type_in, tree *type_out)
+ {
++ gimple last_stmt = VEC_index (gimple, *stmts, 0);
+ tree fn, base, exp = NULL;
+ gimple stmt;
+ tree var;
+
+- if (!is_gimple_call (*last_stmt) || gimple_call_lhs (*last_stmt) == NULL)
++ if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
+ return NULL;
+
+- fn = gimple_call_fndecl (*last_stmt);
++ fn = gimple_call_fndecl (last_stmt);
+ if (fn == NULL_TREE || DECL_BUILT_IN_CLASS (fn) != BUILT_IN_NORMAL)
+ return NULL;
+
+@@ -574,8 +659,8 @@
+ case BUILT_IN_POWI:
+ case BUILT_IN_POWF:
+ case BUILT_IN_POW:
+- base = gimple_call_arg (*last_stmt, 0);
+- exp = gimple_call_arg (*last_stmt, 1);
++ base = gimple_call_arg (last_stmt, 0);
++ exp = gimple_call_arg (last_stmt, 1);
+ if (TREE_CODE (exp) != REAL_CST
+ && TREE_CODE (exp) != INTEGER_CST)
+ return NULL;
+@@ -667,21 +752,23 @@
+ inner-loop nested in an outer-loop that us being vectorized). */
+
+ static gimple
+-vect_recog_widen_sum_pattern (gimple *last_stmt, tree *type_in, tree *type_out)
++vect_recog_widen_sum_pattern (VEC (gimple, heap) **stmts, tree *type_in,
++ tree *type_out)
+ {
++ gimple last_stmt = VEC_index (gimple, *stmts, 0);
+ gimple stmt;
+ tree oprnd0, oprnd1;
+- stmt_vec_info stmt_vinfo = vinfo_for_stmt (*last_stmt);
++ stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
+ tree type, half_type;
+ gimple pattern_stmt;
+ loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
+ struct loop *loop = LOOP_VINFO_LOOP (loop_info);
+ tree var;
+
+- if (!is_gimple_assign (*last_stmt))
++ if (!is_gimple_assign (last_stmt))
+ return NULL;
+
+- type = gimple_expr_type (*last_stmt);
++ type = gimple_expr_type (last_stmt);
+
+ /* Look for the following pattern
+ DX = (TYPE) X;
+@@ -693,25 +780,25 @@
+ /* Starting from LAST_STMT, follow the defs of its uses in search
+ of the above pattern. */
+
+- if (gimple_assign_rhs_code (*last_stmt) != PLUS_EXPR)
++ if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
+ return NULL;
+
+ if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
+ return NULL;
+
+- oprnd0 = gimple_assign_rhs1 (*last_stmt);
+- oprnd1 = gimple_assign_rhs2 (*last_stmt);
++ oprnd0 = gimple_assign_rhs1 (last_stmt);
++ oprnd1 = gimple_assign_rhs2 (last_stmt);
+ if (!types_compatible_p (TREE_TYPE (oprnd0), type)
+ || !types_compatible_p (TREE_TYPE (oprnd1), type))
+ return NULL;
+
+- /* So far so good. Since *last_stmt was detected as a (summation) reduction,
++ /* So far so good. Since last_stmt was detected as a (summation) reduction,
+ we know that oprnd1 is the reduction variable (defined by a loop-header
+ phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
+ Left to check that oprnd0 is defined by a cast from type 'type' to type
+ 'TYPE'. */
+
+- if (!widened_name_p (oprnd0, *last_stmt, &half_type, &stmt, true))
++ if (!widened_name_p (oprnd0, last_stmt, &half_type, &stmt, true))
+ return NULL;
+
+ oprnd0 = gimple_assign_rhs1 (stmt);
+@@ -732,8 +819,9 @@
+
+ /* We don't allow changing the order of the computation in the inner-loop
+ when doing outer-loop vectorization. */
+- gcc_assert (!nested_in_vect_loop_p (loop, *last_stmt));
++ gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
+
++ VEC_safe_push (gimple, heap, *stmts, last_stmt);
+ return pattern_stmt;
+ }
+
+@@ -762,7 +850,7 @@
+
+ static void
+ vect_pattern_recog_1 (
+- gimple (* vect_recog_func) (gimple *, tree *, tree *),
++ gimple (* vect_recog_func) (VEC (gimple, heap) **, tree *, tree *),
+ gimple_stmt_iterator si)
+ {
+ gimple stmt = gsi_stmt (si), pattern_stmt;
+@@ -774,12 +862,14 @@
+ enum tree_code code;
+ int i;
+ gimple next;
++ VEC (gimple, heap) *stmts_to_replace = VEC_alloc (gimple, heap, 1);
+
+- pattern_stmt = (* vect_recog_func) (&stmt, &type_in, &type_out);
++ VEC_quick_push (gimple, stmts_to_replace, stmt);
++ pattern_stmt = (* vect_recog_func) (&stmts_to_replace, &type_in, &type_out);
+ if (!pattern_stmt)
+ return;
+
+- si = gsi_for_stmt (stmt);
++ stmt = VEC_last (gimple, stmts_to_replace);
+ stmt_info = vinfo_for_stmt (stmt);
+ loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
+
+@@ -849,6 +939,35 @@
+ FOR_EACH_VEC_ELT (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next)
+ if (next == stmt)
+ VEC_ordered_remove (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i);
++
++ /* In case of widen-mult by a constant, it is possible that an additional
++ pattern stmt is created and inserted in STMTS_TO_REPLACE. We create a
++ stmt_info for it, and mark the relevant statements. */
++ for (i = 0; VEC_iterate (gimple, stmts_to_replace, i, stmt)
++ && (unsigned) i < (VEC_length (gimple, stmts_to_replace) - 1);
++ i++)
++ {
++ stmt_info = vinfo_for_stmt (stmt);
++ pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
++ if (vect_print_dump_info (REPORT_DETAILS))
++ {
++ fprintf (vect_dump, "additional pattern stmt: ");
++ print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
++ }
++
++ set_vinfo_for_stmt (pattern_stmt,
++ new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL));
++ gimple_set_bb (pattern_stmt, gimple_bb (stmt));
++ pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
++
++ STMT_VINFO_RELATED_STMT (pattern_stmt_info) = stmt;
++ STMT_VINFO_DEF_TYPE (pattern_stmt_info)
++ = STMT_VINFO_DEF_TYPE (stmt_info);
++ STMT_VINFO_VECTYPE (pattern_stmt_info) = STMT_VINFO_VECTYPE (stmt_info);
++ STMT_VINFO_IN_PATTERN_P (stmt_info) = true;
++ }
++
++ VEC_free (gimple, heap, stmts_to_replace);
+ }
+
+
+@@ -896,10 +1015,8 @@
+
+ If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
+ (because they are marked as irrelevant). It will vectorize S6, and record
+- a pointer to the new vector stmt VS6 both from S6 (as usual), and also
+- from S4. We do that so that when we get to vectorizing stmts that use the
+- def of S4 (like S5 that uses a_0), we'll know where to take the relevant
+- vector-def from. S4 will be skipped, and S5 will be vectorized as usual:
++ a pointer to the new vector stmt VS6 from S6 (as usual).
++ S4 will be skipped, and S5 will be vectorized as usual:
+
+ in_pattern_p related_stmt vec_stmt
+ S1: a_i = .... - - -
+@@ -915,7 +1032,21 @@
+ elsewhere), and we'll end up with:
+
+ VS6: va_new = ....
+- VS5: ... = ..vuse(va_new).. */
++ VS5: ... = ..vuse(va_new)..
++
++ In case of more than one pattern statements, e.g., widen-mult with
++ intermediate type:
++
++ S1 a_t = ;
++ S2 a_T = (TYPE) a_t;
++ '--> S3: a_it = (interm_type) a_t;
++ S4 prod_T = a_T * CONST;
++ '--> S5: prod_T' = a_it w* CONST;
++
++ there may be other users of a_T outside the pattern. In that case S2 will
++ be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
++ and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
++ be recorded in S3. */
+
+ void
+ vect_pattern_recog (loop_vec_info loop_vinfo)
+@@ -925,7 +1056,7 @@
+ unsigned int nbbs = loop->num_nodes;
+ gimple_stmt_iterator si;
+ unsigned int i, j;
+- gimple (* vect_recog_func_ptr) (gimple *, tree *, tree *);
++ gimple (* vect_recog_func_ptr) (VEC (gimple, heap) **, tree *, tree *);
+
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "=== vect_pattern_recog ===");
+Index: gcc-4_6-branch/gcc/tree-vect-slp.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/tree-vect-slp.c 2012-01-09 15:02:15.000000000 -0800
++++ gcc-4_6-branch/gcc/tree-vect-slp.c 2012-01-09 15:03:29.160918806 -0800
+@@ -152,7 +152,9 @@
+ if (loop && def_stmt && gimple_bb (def_stmt)
+ && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
+ && vinfo_for_stmt (def_stmt)
+- && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt)))
++ && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))
++ && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt))
++ && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
+ {
+ if (!*first_stmt_dt0)
+ *pattern0 = true;
+Index: gcc-4_6-branch/gcc/tree-vect-stmts.c
+===================================================================
+--- gcc-4_6-branch.orig/gcc/tree-vect-stmts.c 2012-01-09 15:02:15.000000000 -0800
++++ gcc-4_6-branch/gcc/tree-vect-stmts.c 2012-01-09 15:06:23.636927250 -0800
+@@ -126,33 +126,72 @@
+
+ static void
+ vect_mark_relevant (VEC(gimple,heap) **worklist, gimple stmt,
+- enum vect_relevant relevant, bool live_p)
++ enum vect_relevant relevant, bool live_p,
++ bool used_in_pattern)
+ {
+ stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+ enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
+ bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
++ gimple pattern_stmt;
+
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "mark relevant %d, live %d.", relevant, live_p);
+
++ /* If this stmt is an original stmt in a pattern, we might need to mark its
++ related pattern stmt instead of the original stmt. However, such stmts
++ may have their own uses that are not in any pattern, in such cases the
++ stmt itself should be marked. */
+ if (STMT_VINFO_IN_PATTERN_P (stmt_info))
+ {
+- gimple pattern_stmt;
++ bool found = false;
++ if (!used_in_pattern)
++ {
++ imm_use_iterator imm_iter;
++ use_operand_p use_p;
++ gimple use_stmt;
++ tree lhs;
++
++ if (is_gimple_assign (stmt))
++ lhs = gimple_assign_lhs (stmt);
++ else
++ lhs = gimple_call_lhs (stmt);
++
++ /* This use is out of pattern use, if LHS has other uses that are
++ pattern uses, we should mark the stmt itself, and not the pattern
++ stmt. */
++ FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
++ {
++ if (is_gimple_debug (USE_STMT (use_p)))
++ continue;
++ use_stmt = USE_STMT (use_p);
++
++ if (vinfo_for_stmt (use_stmt)
++ && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
++ {
++ found = true;
++ break;
++ }
++ }
++ }
++
++ if (!found)
++ {
++ /* This is the last stmt in a sequence that was detected as a
++ pattern that can potentially be vectorized. Don't mark the stmt
++ as relevant/live because it's not going to be vectorized.
++ Instead mark the pattern-stmt that replaces it. */
++
++ pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
+
+- /* This is the last stmt in a sequence that was detected as a
+- pattern that can potentially be vectorized. Don't mark the stmt
+- as relevant/live because it's not going to be vectorized.
+- Instead mark the pattern-stmt that replaces it. */
+-
+- pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
+-
+- if (vect_print_dump_info (REPORT_DETAILS))
+- fprintf (vect_dump, "last stmt in pattern. don't mark relevant/live.");
+- stmt_info = vinfo_for_stmt (pattern_stmt);
+- gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
+- save_relevant = STMT_VINFO_RELEVANT (stmt_info);
+- save_live_p = STMT_VINFO_LIVE_P (stmt_info);
+- stmt = pattern_stmt;
++ if (vect_print_dump_info (REPORT_DETAILS))
++ fprintf (vect_dump, "last stmt in pattern. don't mark"
++ " relevant/live.");
++ stmt_info = vinfo_for_stmt (pattern_stmt);
++ gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
++ save_relevant = STMT_VINFO_RELEVANT (stmt_info);
++ save_live_p = STMT_VINFO_LIVE_P (stmt_info);
++ stmt = pattern_stmt;
++ }
+ }
+
+ STMT_VINFO_LIVE_P (stmt_info) |= live_p;
+@@ -437,7 +476,8 @@
+ }
+ }
+
+- vect_mark_relevant (worklist, def_stmt, relevant, live_p);
++ vect_mark_relevant (worklist, def_stmt, relevant, live_p,
++ is_pattern_stmt_p (stmt_vinfo));
+ return true;
+ }
+
+@@ -494,7 +534,7 @@
+ }
+
+ if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
+- vect_mark_relevant (&worklist, phi, relevant, live_p);
++ vect_mark_relevant (&worklist, phi, relevant, live_p, false);
+ }
+ for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
+ {
+@@ -506,7 +546,7 @@
+ }
+
+ if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
+- vect_mark_relevant (&worklist, stmt, relevant, live_p);
++ vect_mark_relevant (&worklist, stmt, relevant, live_p, false);
+ }
+ }
+
+@@ -613,42 +653,55 @@
+ if (is_gimple_assign (stmt))
+ {
+ tree rhs = gimple_assign_rhs1 (stmt);
+- if (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))
+- == GIMPLE_SINGLE_RHS)
++ unsigned int op_num;
++ tree op;
++ switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)))
+ {
+- unsigned int op_num = TREE_OPERAND_LENGTH (gimple_assign_rhs1
+- (stmt));
+- for (i = 0; i < op_num; i++)
+- {
+- tree op = TREE_OPERAND (rhs, i);
+- if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
+- &worklist))
++ case GIMPLE_SINGLE_RHS:
++ op_num = TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt));
++ for (i = 0; i < op_num; i++)
+ {
+- VEC_free (gimple, heap, worklist);
+- return false;
++ op = TREE_OPERAND (rhs, i);
++ if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
++ &worklist))
++ {
++ VEC_free (gimple, heap, worklist);
++ return false;
++ }
+ }
+- }
+- }
+- else if (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))
+- == GIMPLE_BINARY_RHS)
+- {
+- tree op = gimple_assign_rhs1 (stmt);
+- if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
+- &worklist))
+- {
+- VEC_free (gimple, heap, worklist);
+- return false;
+- }
+- op = gimple_assign_rhs2 (stmt);
+- if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
+- &worklist))
+- {
+- VEC_free (gimple, heap, worklist);
+- return false;
+- }
++ break;
++
++ case GIMPLE_BINARY_RHS:
++ op = gimple_assign_rhs1 (stmt);
++ if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
++ &worklist))
++ {
++ VEC_free (gimple, heap, worklist);
++ return false;
++ }
++ op = gimple_assign_rhs2 (stmt);
++ if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
++ &worklist))
++ {
++ VEC_free (gimple, heap, worklist);
++ return false;
++ }
++ break;
++
++ case GIMPLE_UNARY_RHS:
++ op = gimple_assign_rhs1 (stmt);
++ if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
++ &worklist))
++ {
++ VEC_free (gimple, heap, worklist);
++ return false;
++ }
++
++ break;
++
++ default:
++ return false;
+ }
+- else
+- return false;
+ }
+ else if (is_gimple_call (stmt))
+ {
+@@ -1210,7 +1263,14 @@
+
+ /* Get the def from the vectorized stmt. */
+ def_stmt_info = vinfo_for_stmt (def_stmt);
++
+ vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
++ /* Get vectorized pattern statement. */
++ if (!vec_stmt
++ && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
++ && !STMT_VINFO_RELEVANT (def_stmt_info))
++ vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
++ STMT_VINFO_RELATED_STMT (def_stmt_info)));
+ gcc_assert (vec_stmt);
+ if (gimple_code (vec_stmt) == GIMPLE_PHI)
+ vec_oprnd = PHI_RESULT (vec_stmt);
+@@ -4894,6 +4954,7 @@
+ enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
+ bool ok;
+ tree scalar_type, vectype;
++ gimple pattern_stmt;
+
+ if (vect_print_dump_info (REPORT_DETAILS))
+ {
+@@ -4915,16 +4976,22 @@
+ - any LABEL_EXPRs in the loop
+ - computations that are used only for array indexing or loop control.
+ In basic blocks we only analyze statements that are a part of some SLP
+- instance, therefore, all the statements are relevant. */
++ instance, therefore, all the statements are relevant.
++
++ Pattern statement need to be analyzed instead of the original statement
++ if the original statement is not relevant. Otherwise, we analyze both
++ statements. */
+
++ pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
+ if (!STMT_VINFO_RELEVANT_P (stmt_info)
+ && !STMT_VINFO_LIVE_P (stmt_info))
+ {
+- gimple pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
+ if (STMT_VINFO_IN_PATTERN_P (stmt_info)
++ && pattern_stmt
+ && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
+ || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
+ {
++ /* Analyze PATTERN_STMT instead of the original stmt. */
+ stmt = pattern_stmt;
+ stmt_info = vinfo_for_stmt (pattern_stmt);
+ if (vect_print_dump_info (REPORT_DETAILS))
+@@ -4941,6 +5008,21 @@
+ return true;
+ }
+ }
++ else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
++ && pattern_stmt
++ && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
++ || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
++ {
++ /* Analyze PATTERN_STMT too. */
++ if (vect_print_dump_info (REPORT_DETAILS))
++ {
++ fprintf (vect_dump, "==> examining pattern statement: ");
++ print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
++ }
++
++ if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
++ return false;
++ }
+
+ switch (STMT_VINFO_DEF_TYPE (stmt_info))
+ {
+@@ -5074,7 +5156,6 @@
+ bool is_store = false;
+ gimple vec_stmt = NULL;
+ stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+- gimple orig_stmt_in_pattern;
+ bool done;
+
+ switch (STMT_VINFO_TYPE (stmt_info))
+@@ -5213,21 +5294,7 @@
+ }
+
+ if (vec_stmt)
+- {
+ STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
+- orig_stmt_in_pattern = STMT_VINFO_RELATED_STMT (stmt_info);
+- if (orig_stmt_in_pattern)
+- {
+- stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt_in_pattern);
+- /* STMT was inserted by the vectorizer to replace a computation idiom.
+- ORIG_STMT_IN_PATTERN is a stmt in the original sequence that
+- computed this idiom. We need to record a pointer to VEC_STMT in
+- the stmt_info of ORIG_STMT_IN_PATTERN. See more details in the
+- documentation of vect_pattern_recog. */
+- if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
+- STMT_VINFO_VEC_STMT (stmt_vinfo) = vec_stmt;
+- }
+- }
+
+ return is_store;
+ }
+@@ -5605,8 +5672,12 @@
+ || *dt == vect_nested_cycle)
+ {
+ stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
+- if (STMT_VINFO_IN_PATTERN_P (stmt_info))
++
++ if (STMT_VINFO_IN_PATTERN_P (stmt_info)
++ && !STMT_VINFO_RELEVANT (stmt_info)
++ && !STMT_VINFO_LIVE_P (stmt_info))
+ stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
++
+ *vectype = STMT_VINFO_VECTYPE (stmt_info);
+ gcc_assert (*vectype != NULL_TREE);
+ }
+Index: gcc-4_6-branch/gcc/tree-vectorizer.h
+===================================================================
+--- gcc-4_6-branch.orig/gcc/tree-vectorizer.h 2012-01-09 15:02:15.000000000 -0800
++++ gcc-4_6-branch/gcc/tree-vectorizer.h 2012-01-09 15:03:29.164918806 -0800
+@@ -890,7 +890,7 @@
+ /* Pattern recognition functions.
+ Additional pattern recognition functions can (and will) be added
+ in the future. */
+-typedef gimple (* vect_recog_func_ptr) (gimple *, tree *, tree *);
++typedef gimple (* vect_recog_func_ptr) (VEC (gimple, heap) **, tree *, tree *);
+ #define NUM_PATTERNS 4
+ void vect_pattern_recog (loop_vec_info);
+