aboutsummaryrefslogtreecommitdiffstats
path: root/toolchain-layer/recipes-devtools/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99495.patch
diff options
context:
space:
mode:
Diffstat (limited to 'toolchain-layer/recipes-devtools/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99495.patch')
-rw-r--r--toolchain-layer/recipes-devtools/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99495.patch784
1 files changed, 0 insertions, 784 deletions
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99495.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99495.patch
deleted file mode 100644
index bb866ce8d9..0000000000
--- a/toolchain-layer/recipes-devtools/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99495.patch
+++ /dev/null
@@ -1,784 +0,0 @@
-2011-03-24 Revital Eres <revital.eres@linaro.org>
-
- gcc/
- * loop-doloop.c (doloop_condition_get): Support new form of
- doloop pattern and use prev_nondebug_insn instead of PREV_INSN.
- * config/arm/thumb2.md (*thumb2_addsi3_compare0): Remove "*".
- (doloop_end): New.
- * config/arm/arm.md (*addsi3_compare0): Remove "*".
- * ddg.c (check_closing_branch_deps, get_node_of_insn_uid):
- New functions.
- (create_ddg): Pass sbitmap containing do-loop related
- instructions instead of closing_branch_deps parameter and call
- check_closing_branch_deps function.
- * ddg.h (create_ddg): Adjust the function declaration.
- * modulo-sched.c (PS_STAGE_COUNT): Rename to CALC_STAGE_COUNT
- and redefine.
- (doloop_register_get): Handle NONDEBUG_INSN_P.
- (stage_count): New field in struct partial_schedule.
- (mark_doloop_insns, calculate_stage_count): New functions.
- (normalize_sched_times): Rename to reset_sched_times and handle
- incrementing the sched time of the nodes by a constant value
- passed as parameter.
- (duplicate_insns_of_cycles): Skip closing branch.
- (sms_schedule_by_order): Schedule closing branch when
- closing_branch_deps is true.
- (ps_insn_find_column): Handle closing branch.
- (sms_schedule): Call reset_sched_times and handle case where
- do-loop pattern is not decoupled from the other loop instructions.
- Support new form of doloop pattern.
- (ps_insert_empty_row): Update calls to normalize_sched_times
- and rotate_partial_schedule functions.
-
-=== modified file 'gcc/config/arm/arm.md'
---- old/gcc/config/arm/arm.md 2011-03-11 14:26:34 +0000
-+++ new/gcc/config/arm/arm.md 2011-03-24 07:45:38 +0000
-@@ -734,7 +734,7 @@
- ""
- )
-
--(define_insn "*addsi3_compare0"
-+(define_insn "addsi3_compare0"
- [(set (reg:CC_NOOV CC_REGNUM)
- (compare:CC_NOOV
- (plus:SI (match_operand:SI 1 "s_register_operand" "r, r")
-
-=== modified file 'gcc/config/arm/thumb2.md'
---- old/gcc/config/arm/thumb2.md 2011-02-08 10:51:58 +0000
-+++ new/gcc/config/arm/thumb2.md 2011-03-24 07:45:38 +0000
-@@ -1194,7 +1194,7 @@
- (set_attr "length" "2")]
- )
-
--(define_insn "*thumb2_addsi3_compare0"
-+(define_insn "thumb2_addsi3_compare0"
- [(set (reg:CC_NOOV CC_REGNUM)
- (compare:CC_NOOV
- (plus:SI (match_operand:SI 1 "s_register_operand" "l, 0, r")
-@@ -1445,3 +1445,56 @@
- [(set_attr "length" "4,4,16")
- (set_attr "predicable" "yes")]
- )
-+
-+
-+;; Define the subtract-one-and-jump insns so loop.c
-+;; knows what to generate.
-+(define_expand "doloop_end"
-+ [(use (match_operand 0 "" "")) ; loop pseudo
-+ (use (match_operand 1 "" "")) ; iterations; zero if unknown
-+ (use (match_operand 2 "" "")) ; max iterations
-+ (use (match_operand 3 "" "")) ; loop level
-+ (use (match_operand 4 "" ""))] ; label
-+ "TARGET_32BIT"
-+ "
-+ {
-+ /* Currently SMS relies on the do-loop pattern to recognize loops
-+ where (1) the control part consists of all insns defining and/or
-+ using a certain 'count' register and (2) the loop count can be
-+ adjusted by modifying this register prior to the loop.
-+ ??? The possible introduction of a new block to initialize the
-+ new IV can potentially affect branch optimizations. */
-+ if (optimize > 0 && flag_modulo_sched)
-+ {
-+ rtx s0;
-+ rtx bcomp;
-+ rtx loc_ref;
-+ rtx cc_reg;
-+ rtx insn;
-+ rtx cmp;
-+
-+ /* Only use this on innermost loops. */
-+ if (INTVAL (operands[3]) > 1)
-+ FAIL;
-+
-+ if (GET_MODE (operands[0]) != SImode)
-+ FAIL;
-+
-+ s0 = operands [0];
-+ if (TARGET_THUMB2)
-+ insn = emit_insn (gen_thumb2_addsi3_compare0 (s0, s0, GEN_INT (-1)));
-+ else
-+ insn = emit_insn (gen_addsi3_compare0 (s0, s0, GEN_INT (-1)));
-+
-+ cmp = XVECEXP (PATTERN (insn), 0, 0);
-+ cc_reg = SET_DEST (cmp);
-+ bcomp = gen_rtx_NE (VOIDmode, cc_reg, const0_rtx);
-+ loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands [4]);
-+ emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
-+ gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
-+ loc_ref, pc_rtx)));
-+ DONE;
-+ }else
-+ FAIL;
-+ }")
-+
-
-=== modified file 'gcc/ddg.c'
---- old/gcc/ddg.c 2010-07-19 08:58:53 +0000
-+++ new/gcc/ddg.c 2011-03-24 07:45:38 +0000
-@@ -60,6 +60,8 @@
- static ddg_edge_ptr create_ddg_edge (ddg_node_ptr, ddg_node_ptr, dep_type,
- dep_data_type, int, int);
- static void add_edge_to_ddg (ddg_ptr g, ddg_edge_ptr);
-+static ddg_node_ptr get_node_of_insn_uid (ddg_ptr, int);
-+
-
- /* Auxiliary variable for mem_read_insn_p/mem_write_insn_p. */
- static bool mem_ref_p;
-@@ -450,12 +452,65 @@
- sched_free_deps (head, tail, false);
- }
-
-+/* Given DOLOOP_INSNS which holds the instructions that
-+ belong to the do-loop part; mark closing_branch_deps field in ddg G
-+ as TRUE if the do-loop part's instructions are dependent on the other
-+ loop instructions. Otherwise mark it as FALSE. */
-+static void
-+check_closing_branch_deps (ddg_ptr g, sbitmap doloop_insns)
-+{
-+ sbitmap_iterator sbi;
-+ unsigned int u = 0;
-+
-+ EXECUTE_IF_SET_IN_SBITMAP (doloop_insns, 0, u, sbi)
-+ {
-+ ddg_edge_ptr e;
-+ ddg_node_ptr u_node = get_node_of_insn_uid (g, u);
-+
-+ gcc_assert (u_node);
-+
-+ for (e = u_node->in; e != 0; e = e->next_in)
-+ {
-+ ddg_node_ptr v_node = e->src;
-+
-+ if (((unsigned int) INSN_UID (v_node->insn) == u)
-+ || DEBUG_INSN_P (v_node->insn))
-+ continue;
-+
-+ /* Ignore dependencies between memory writes and the
-+ jump. */
-+ if (JUMP_P (u_node->insn)
-+ && e->type == OUTPUT_DEP
-+ && mem_write_insn_p (v_node->insn))
-+ continue;
-+ if (!TEST_BIT (doloop_insns, INSN_UID (v_node->insn)))
-+ {
-+ g->closing_branch_deps = 1;
-+ return;
-+ }
-+ }
-+ for (e = u_node->out; e != 0; e = e->next_out)
-+ {
-+ ddg_node_ptr v_node = e->dest;
-+
-+ if (((unsigned int) INSN_UID (v_node->insn) == u)
-+ || DEBUG_INSN_P (v_node->insn))
-+ continue;
-+ if (!TEST_BIT (doloop_insns, INSN_UID (v_node->insn)))
-+ {
-+ g->closing_branch_deps = 1;
-+ return;
-+ }
-+ }
-+ }
-+ g->closing_branch_deps = 0;
-+}
-
- /* Given a basic block, create its DDG and return a pointer to a variable
- of ddg type that represents it.
- Initialize the ddg structure fields to the appropriate values. */
- ddg_ptr
--create_ddg (basic_block bb, int closing_branch_deps)
-+create_ddg (basic_block bb, sbitmap doloop_insns)
- {
- ddg_ptr g;
- rtx insn, first_note;
-@@ -465,7 +520,6 @@
- g = (ddg_ptr) xcalloc (1, sizeof (struct ddg));
-
- g->bb = bb;
-- g->closing_branch_deps = closing_branch_deps;
-
- /* Count the number of insns in the BB. */
- for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb));
-@@ -538,6 +592,11 @@
- /* Build the data dependency graph. */
- build_intra_loop_deps (g);
- build_inter_loop_deps (g);
-+
-+ /* Check whether the do-loop part is decoupled from the other loop
-+ instructions. */
-+ check_closing_branch_deps (g, doloop_insns);
-+
- return g;
- }
-
-@@ -831,6 +890,18 @@
- return NULL;
- }
-
-+/* Given the uid of an instruction UID return the node that represents it. */
-+static ddg_node_ptr
-+get_node_of_insn_uid (ddg_ptr g, int uid)
-+{
-+ int i;
-+
-+ for (i = 0; i < g->num_nodes; i++)
-+ if (uid == INSN_UID (g->nodes[i].insn))
-+ return &g->nodes[i];
-+ return NULL;
-+}
-+
- /* Given a set OPS of nodes in the DDG, find the set of their successors
- which are not in OPS, and set their bits in SUCC. Bits corresponding to
- OPS are cleared from SUCC. Leaves the other bits in SUCC unchanged. */
-
-=== modified file 'gcc/ddg.h'
---- old/gcc/ddg.h 2009-11-25 10:55:54 +0000
-+++ new/gcc/ddg.h 2011-03-24 07:45:38 +0000
-@@ -167,7 +167,7 @@
- };
-
-
--ddg_ptr create_ddg (basic_block, int closing_branch_deps);
-+ddg_ptr create_ddg (basic_block, sbitmap);
- void free_ddg (ddg_ptr);
-
- void print_ddg (FILE *, ddg_ptr);
-
-=== modified file 'gcc/loop-doloop.c'
---- old/gcc/loop-doloop.c 2010-07-19 08:58:53 +0000
-+++ new/gcc/loop-doloop.c 2011-03-24 07:45:38 +0000
-@@ -78,6 +78,8 @@
- rtx inc_src;
- rtx condition;
- rtx pattern;
-+ rtx cc_reg = NULL_RTX;
-+ rtx reg_orig = NULL_RTX;
-
- /* The canonical doloop pattern we expect has one of the following
- forms:
-@@ -96,7 +98,16 @@
- 2) (set (reg) (plus (reg) (const_int -1))
- (set (pc) (if_then_else (reg != 0)
- (label_ref (label))
-- (pc))). */
-+ (pc))).
-+
-+ Some targets (ARM) do the comparison before the branch, as in the
-+ following form:
-+
-+ 3) (parallel [(set (cc) (compare ((plus (reg) (const_int -1), 0)))
-+ (set (reg) (plus (reg) (const_int -1)))])
-+ (set (pc) (if_then_else (cc == NE)
-+ (label_ref (label))
-+ (pc))) */
-
- pattern = PATTERN (doloop_pat);
-
-@@ -104,19 +115,47 @@
- {
- rtx cond;
- rtx prev_insn = prev_nondebug_insn (doloop_pat);
-+ rtx cmp_arg1, cmp_arg2;
-+ rtx cmp_orig;
-
-- /* We expect the decrement to immediately precede the branch. */
-+ /* In case the pattern is not PARALLEL we expect two forms
-+ of doloop which are cases 2) and 3) above: in case 2) the
-+ decrement immediately precedes the branch, while in case 3)
-+ the compare and decrement instructions immediately precede
-+ the branch. */
-
- if (prev_insn == NULL_RTX || !INSN_P (prev_insn))
- return 0;
-
- cmp = pattern;
-- inc = PATTERN (PREV_INSN (doloop_pat));
-+ if (GET_CODE (PATTERN (prev_insn)) == PARALLEL)
-+ {
-+ /* The third case: the compare and decrement instructions
-+ immediately precede the branch. */
-+ cmp_orig = XVECEXP (PATTERN (prev_insn), 0, 0);
-+ if (GET_CODE (cmp_orig) != SET)
-+ return 0;
-+ if (GET_CODE (SET_SRC (cmp_orig)) != COMPARE)
-+ return 0;
-+ cmp_arg1 = XEXP (SET_SRC (cmp_orig), 0);
-+ cmp_arg2 = XEXP (SET_SRC (cmp_orig), 1);
-+ if (cmp_arg2 != const0_rtx
-+ || GET_CODE (cmp_arg1) != PLUS)
-+ return 0;
-+ reg_orig = XEXP (cmp_arg1, 0);
-+ if (XEXP (cmp_arg1, 1) != GEN_INT (-1)
-+ || !REG_P (reg_orig))
-+ return 0;
-+ cc_reg = SET_DEST (cmp_orig);
-+
-+ inc = XVECEXP (PATTERN (prev_insn), 0, 1);
-+ }
-+ else
-+ inc = PATTERN (prev_insn);
- /* We expect the condition to be of the form (reg != 0) */
- cond = XEXP (SET_SRC (cmp), 0);
- if (GET_CODE (cond) != NE || XEXP (cond, 1) != const0_rtx)
- return 0;
--
- }
- else
- {
-@@ -162,11 +201,15 @@
- return 0;
-
- if ((XEXP (condition, 0) == reg)
-+ /* For the third case: */
-+ || ((cc_reg != NULL_RTX)
-+ && (XEXP (condition, 0) == cc_reg)
-+ && (reg_orig == reg))
- || (GET_CODE (XEXP (condition, 0)) == PLUS
-- && XEXP (XEXP (condition, 0), 0) == reg))
-+ && XEXP (XEXP (condition, 0), 0) == reg))
- {
- if (GET_CODE (pattern) != PARALLEL)
-- /* The second form we expect:
-+ /* For the second form we expect:
-
- (set (reg) (plus (reg) (const_int -1))
- (set (pc) (if_then_else (reg != 0)
-@@ -181,7 +224,24 @@
- (set (reg) (plus (reg) (const_int -1)))
- (additional clobbers and uses)])
-
-- So we return that form instead.
-+ For the third form we expect:
-+
-+ (parallel [(set (cc) (compare ((plus (reg) (const_int -1)), 0))
-+ (set (reg) (plus (reg) (const_int -1)))])
-+ (set (pc) (if_then_else (cc == NE)
-+ (label_ref (label))
-+ (pc)))
-+
-+ which is equivalent to the following:
-+
-+ (parallel [(set (cc) (compare (reg, 1))
-+ (set (reg) (plus (reg) (const_int -1)))
-+ (set (pc) (if_then_else (NE == cc)
-+ (label_ref (label))
-+ (pc))))])
-+
-+ So we return the second form instead for the two cases.
-+
- */
- condition = gen_rtx_fmt_ee (NE, VOIDmode, inc_src, const1_rtx);
-
-
-=== modified file 'gcc/modulo-sched.c'
---- old/gcc/modulo-sched.c 2009-11-25 10:55:54 +0000
-+++ new/gcc/modulo-sched.c 2011-03-24 07:45:38 +0000
-@@ -116,8 +116,10 @@
-
- /* The number of different iterations the nodes in ps span, assuming
- the stage boundaries are placed efficiently. */
--#define PS_STAGE_COUNT(ps) ((PS_MAX_CYCLE (ps) - PS_MIN_CYCLE (ps) \
-- + 1 + (ps)->ii - 1) / (ps)->ii)
-+#define CALC_STAGE_COUNT(min_cycle,max_cycle,ii) ((max_cycle - min_cycle \
-+ + 1 + ii - 1) / ii)
-+/* The stage count of ps. */
-+#define PS_STAGE_COUNT(ps) (((partial_schedule_ptr)(ps))->stage_count)
-
- /* A single instruction in the partial schedule. */
- struct ps_insn
-@@ -155,6 +157,8 @@
- int max_cycle;
-
- ddg_ptr g; /* The DDG of the insns in the partial schedule. */
-+
-+ int stage_count; /* The stage count of the partial schedule. */
- };
-
- /* We use this to record all the register replacements we do in
-@@ -195,6 +199,7 @@
- rtx, rtx);
- static void duplicate_insns_of_cycles (partial_schedule_ptr,
- int, int, int, rtx);
-+static int calculate_stage_count (partial_schedule_ptr ps);
-
- #define SCHED_ASAP(x) (((node_sched_params_ptr)(x)->aux.info)->asap)
- #define SCHED_TIME(x) (((node_sched_params_ptr)(x)->aux.info)->time)
-@@ -310,10 +315,10 @@
- either a single (parallel) branch-on-count or a (non-parallel)
- branch immediately preceded by a single (decrement) insn. */
- first_insn_not_to_check = (GET_CODE (PATTERN (tail)) == PARALLEL ? tail
-- : PREV_INSN (tail));
-+ : prev_nondebug_insn (tail));
-
- for (insn = head; insn != first_insn_not_to_check; insn = NEXT_INSN (insn))
-- if (reg_mentioned_p (reg, insn))
-+ if (reg_mentioned_p (reg, insn) && NONDEBUG_INSN_P (insn))
- {
- if (dump_file)
- {
-@@ -332,6 +337,24 @@
- #endif
- }
-
-+/* Mark in DOLOOP_INSNS the instructions that belong to the do-loop part.
-+ Use TAIL to recognize that part. */
-+static void
-+mark_doloop_insns (sbitmap doloop_insns, rtx tail)
-+{
-+ rtx first_insn_not_to_check, insn;
-+
-+ /* This is the first instruction which belongs the doloop part. */
-+ first_insn_not_to_check = (GET_CODE (PATTERN (tail)) == PARALLEL ? tail
-+ : prev_nondebug_insn (tail));
-+
-+ sbitmap_zero (doloop_insns);
-+ for (insn = first_insn_not_to_check; insn != NEXT_INSN (tail);
-+ insn = NEXT_INSN (insn))
-+ if (NONDEBUG_INSN_P (insn))
-+ SET_BIT (doloop_insns, INSN_UID (insn));
-+}
-+
- /* Check if COUNT_REG is set to a constant in the PRE_HEADER block, so
- that the number of iterations is a compile-time constant. If so,
- return the rtx that sets COUNT_REG to a constant, and set COUNT to
-@@ -569,13 +592,12 @@
- }
- }
-
--/* Bump the SCHED_TIMEs of all nodes to start from zero. Set the values
-- of SCHED_ROW and SCHED_STAGE. */
-+/* Bump the SCHED_TIMEs of all nodes by AMOUNT. Set the values of
-+ SCHED_ROW and SCHED_STAGE. */
- static void
--normalize_sched_times (partial_schedule_ptr ps)
-+reset_sched_times (partial_schedule_ptr ps, int amount)
- {
- int row;
-- int amount = PS_MIN_CYCLE (ps);
- int ii = ps->ii;
- ps_insn_ptr crr_insn;
-
-@@ -584,6 +606,10 @@
- {
- ddg_node_ptr u = crr_insn->node;
- int normalized_time = SCHED_TIME (u) - amount;
-+ int new_min_cycle = PS_MIN_CYCLE (ps) - amount;
-+ /* The first cycle in row zero after the rotation. */
-+ int new_first_cycle_in_row_zero =
-+ new_min_cycle + ii - SMODULO (new_min_cycle, ii);
-
- if (dump_file)
- fprintf (dump_file, "crr_insn->node=%d, crr_insn->cycle=%d,\
-@@ -592,8 +618,30 @@
- gcc_assert (SCHED_TIME (u) >= ps->min_cycle);
- gcc_assert (SCHED_TIME (u) <= ps->max_cycle);
- SCHED_TIME (u) = normalized_time;
-- SCHED_ROW (u) = normalized_time % ii;
-- SCHED_STAGE (u) = normalized_time / ii;
-+ crr_insn->cycle = normalized_time;
-+ SCHED_ROW (u) = SMODULO (normalized_time, ii);
-+
-+ /* If min_cycle is in row zero after the rotation then
-+ the stage count can be calculated by dividing the cycle
-+ with ii. Otherwise, the calculation is done by dividing the
-+ SMSed kernel into two intervals:
-+
-+ 1) min_cycle <= interval 0 < first_cycle_in_row_zero
-+ 2) first_cycle_in_row_zero <= interval 1 < max_cycle
-+
-+ Cycles in interval 0 are in stage 0. The stage of cycles
-+ in interval 1 should be added by 1 to take interval 0 into
-+ account. */
-+ if (SMODULO (new_min_cycle, ii) == 0)
-+ SCHED_STAGE (u) = normalized_time / ii;
-+ else
-+ {
-+ if (crr_insn->cycle < new_first_cycle_in_row_zero)
-+ SCHED_STAGE (u) = 0;
-+ else
-+ SCHED_STAGE (u) =
-+ ((SCHED_TIME (u) - new_first_cycle_in_row_zero) / ii) + 1;
-+ }
- }
- }
-
-@@ -646,9 +694,12 @@
-
- /* Do not duplicate any insn which refers to count_reg as it
- belongs to the control part.
-+ If closing_branch_deps is true the closing branch is scheduled
-+ as well and thus should be ignored.
- TODO: This should be done by analyzing the control part of
- the loop. */
-- if (reg_mentioned_p (count_reg, u_node->insn))
-+ if (reg_mentioned_p (count_reg, u_node->insn)
-+ || JUMP_P (ps_ij->node->insn))
- continue;
-
- if (for_prolog)
-@@ -894,7 +945,8 @@
- basic_block condition_bb = NULL;
- edge latch_edge;
- gcov_type trip_count = 0;
--
-+ sbitmap doloop_insns;
-+
- loop_optimizer_init (LOOPS_HAVE_PREHEADERS
- | LOOPS_HAVE_RECORDED_EXITS);
- if (number_of_loops () <= 1)
-@@ -919,6 +971,7 @@
- setup_sched_infos ();
- haifa_sched_init ();
-
-+ doloop_insns = sbitmap_alloc (get_max_uid () + 1);
- /* Allocate memory to hold the DDG array one entry for each loop.
- We use loop->num as index into this array. */
- g_arr = XCNEWVEC (ddg_ptr, number_of_loops ());
-@@ -1009,9 +1062,11 @@
- continue;
- }
-
-- /* Don't handle BBs with calls or barriers, or !single_set insns,
-- or auto-increment insns (to avoid creating invalid reg-moves
-- for the auto-increment insns).
-+ /* Don't handle BBs with calls or barriers or auto-increment insns
-+ (to avoid creating invalid reg-moves for the auto-increment insns),
-+ or !single_set with the exception of instructions that include
-+ count_reg---these instructions are part of the control part
-+ that do-loop recognizes.
- ??? Should handle auto-increment insns.
- ??? Should handle insns defining subregs. */
- for (insn = head; insn != NEXT_INSN (tail); insn = NEXT_INSN (insn))
-@@ -1021,7 +1076,8 @@
- if (CALL_P (insn)
- || BARRIER_P (insn)
- || (NONDEBUG_INSN_P (insn) && !JUMP_P (insn)
-- && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE)
-+ && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE
-+ && !reg_mentioned_p (count_reg, insn))
- || (FIND_REG_INC_NOTE (insn, NULL_RTX) != 0)
- || (INSN_P (insn) && (set = single_set (insn))
- && GET_CODE (SET_DEST (set)) == SUBREG))
-@@ -1048,14 +1104,16 @@
-
- continue;
- }
--
-- if (! (g = create_ddg (bb, 0)))
-+ mark_doloop_insns (doloop_insns, tail);
-+ if (! (g = create_ddg (bb, doloop_insns)))
- {
- if (dump_file)
- fprintf (dump_file, "SMS create_ddg failed\n");
- continue;
- }
--
-+ if (dump_file)
-+ fprintf (dump_file, "SMS closing_branch_deps: %d\n",
-+ g->closing_branch_deps);
- g_arr[loop->num] = g;
- if (dump_file)
- fprintf (dump_file, "...OK\n");
-@@ -1157,11 +1215,13 @@
-
- ps = sms_schedule_by_order (g, mii, maxii, node_order);
-
-- if (ps){
-- stage_count = PS_STAGE_COUNT (ps);
-- gcc_assert(stage_count >= 1);
-- }
--
-+ if (ps)
-+ {
-+ stage_count = calculate_stage_count (ps);
-+ gcc_assert(stage_count >= 1);
-+ PS_STAGE_COUNT(ps) = stage_count;
-+ }
-+
- /* Stage count of 1 means that there is no interleaving between
- iterations, let the scheduling passes do the job. */
- if (stage_count <= 1
-@@ -1182,17 +1242,7 @@
- else
- {
- struct undo_replace_buff_elem *reg_move_replaces;
--
-- if (dump_file)
-- {
-- fprintf (dump_file,
-- "SMS succeeded %d %d (with ii, sc)\n", ps->ii,
-- stage_count);
-- print_partial_schedule (ps, dump_file);
-- fprintf (dump_file,
-- "SMS Branch (%d) will later be scheduled at cycle %d.\n",
-- g->closing_branch->cuid, PS_MIN_CYCLE (ps) - 1);
-- }
-+ int amount;
-
- /* Set the stage boundaries. If the DDG is built with closing_branch_deps,
- the closing_branch was scheduled and should appear in the last (ii-1)
-@@ -1202,12 +1252,28 @@
- TODO: Revisit the issue of scheduling the insns of the
- control part relative to the branch when the control part
- has more than one insn. */
-- normalize_sched_times (ps);
-- rotate_partial_schedule (ps, PS_MIN_CYCLE (ps));
-+ amount = (g->closing_branch_deps)? SCHED_TIME (g->closing_branch) + 1:
-+ PS_MIN_CYCLE (ps);
-+ reset_sched_times (ps, amount);
-+ rotate_partial_schedule (ps, amount);
-+
- set_columns_for_ps (ps);
-
- canon_loop (loop);
-
-+ if (dump_file)
-+ {
-+ fprintf (dump_file,
-+ "SMS succeeded %d %d (with ii, sc)\n", ps->ii,
-+ stage_count);
-+ print_partial_schedule (ps, dump_file);
-+ if (!g->closing_branch_deps)
-+ fprintf (dump_file,
-+ "SMS Branch (%d) will later be scheduled at \
-+ cycle %d.\n",
-+ g->closing_branch->cuid, PS_MIN_CYCLE (ps) - 1);
-+ }
-+
- /* case the BCT count is not known , Do loop-versioning */
- if (count_reg && ! count_init)
- {
-@@ -1252,6 +1318,7 @@
- }
-
- free (g_arr);
-+ sbitmap_free (doloop_insns);
-
- /* Release scheduler data, needed until now because of DFA. */
- haifa_sched_finish ();
-@@ -1759,8 +1826,9 @@
- RESET_BIT (tobe_scheduled, u);
- continue;
- }
--
-- if (JUMP_P (insn)) /* Closing branch handled later. */
-+ /* Closing branch handled later unless closing_branch_deps
-+ is true. */
-+ if (JUMP_P (insn) && !g->closing_branch_deps)
- {
- RESET_BIT (tobe_scheduled, u);
- continue;
-@@ -1893,8 +1961,8 @@
- if (dump_file)
- fprintf (dump_file, "split_row=%d\n", split_row);
-
-- normalize_sched_times (ps);
-- rotate_partial_schedule (ps, ps->min_cycle);
-+ reset_sched_times (ps, PS_MIN_CYCLE (ps));
-+ rotate_partial_schedule (ps, PS_MIN_CYCLE (ps));
-
- rows_new = (ps_insn_ptr *) xcalloc (new_ii, sizeof (ps_insn_ptr));
- for (row = 0; row < split_row; row++)
-@@ -2571,6 +2639,7 @@
- ps_insn_ptr next_ps_i;
- ps_insn_ptr first_must_follow = NULL;
- ps_insn_ptr last_must_precede = NULL;
-+ ps_insn_ptr last_in_row = NULL;
- int row;
-
- if (! ps_i)
-@@ -2597,8 +2666,37 @@
- else
- last_must_precede = next_ps_i;
- }
-+ /* The closing branch must be the last in the row. */
-+ if (must_precede
-+ && TEST_BIT (must_precede, next_ps_i->node->cuid)
-+ && JUMP_P (next_ps_i->node->insn))
-+ return false;
-+
-+ last_in_row = next_ps_i;
- }
-
-+ /* If closing_branch_deps is true we are scheduling the closing
-+ branch as well. Make sure there is no dependent instruction after
-+ it as the branch should be the last instruction. */
-+ if (JUMP_P (ps_i->node->insn))
-+ {
-+ if (first_must_follow)
-+ return false;
-+ if (last_in_row)
-+ {
-+ /* Make the branch the last in the row. New instructions
-+ will be inserted at the beginning of the row or after the
-+ last must_precede instruction thus the branch is guaranteed
-+ to remain the last instruction in the row. */
-+ last_in_row->next_in_row = ps_i;
-+ ps_i->prev_in_row = last_in_row;
-+ ps_i->next_in_row = NULL;
-+ }
-+ else
-+ ps->rows[row] = ps_i;
-+ return true;
-+ }
-+
- /* Now insert the node after INSERT_AFTER_PSI. */
-
- if (! last_must_precede)
-@@ -2820,6 +2918,54 @@
- return ps_i;
- }
-
-+/* Calculate the stage count of the partial schedule PS. */
-+int
-+calculate_stage_count (partial_schedule_ptr ps)
-+{
-+ int stage_count;
-+
-+ /* If closing_branch_deps is false then the stage
-+ boundaries are placed efficiently, meaning that min_cycle will be
-+ placed at row 0. Otherwise, the closing branch will be placed in
-+ row ii-1. For the later case we assume the final SMSed kernel can
-+ be divided into two intervals. This assumption is used for the
-+ stage count calculation:
-+
-+ 1) min_cycle <= interval 0 < first_cycle_in_row_zero
-+ 2) first_cycle_in_row_zero <= interval 1 < max_cycle
-+ */
-+ stage_count =
-+ CALC_STAGE_COUNT (PS_MIN_CYCLE (ps), PS_MAX_CYCLE (ps), ps->ii);
-+ if (ps->g->closing_branch_deps)
-+ {
-+ int new_min_cycle;
-+ int new_min_cycle_row;
-+ int rotation_amount = SCHED_TIME (ps->g->closing_branch) + 1;
-+
-+ /* This is the new value of min_cycle after the final rotation to
-+ bring closing branch into row ii-1. */
-+ new_min_cycle = PS_MIN_CYCLE (ps) - rotation_amount;
-+ /* This is the row which the the new min_cycle will be placed in. */
-+ new_min_cycle_row = SMODULO (new_min_cycle, ps->ii);
-+ /* If the row of min_cycle is zero then interval 0 is empty.
-+ Otherwise, we need to calculate interval 1 and add it by one
-+ to take interval 0 into account. */
-+ if (new_min_cycle_row != 0)
-+ {
-+ int new_max_cycle, first_cycle_in_row_zero;
-+
-+ new_max_cycle = PS_MAX_CYCLE (ps) - rotation_amount;
-+ first_cycle_in_row_zero =
-+ new_min_cycle + ps->ii - new_min_cycle_row;
-+
-+ stage_count =
-+ CALC_STAGE_COUNT (first_cycle_in_row_zero, new_max_cycle,
-+ ps->ii) + 1;
-+ }
-+ }
-+ return stage_count;
-+}
-+
- /* Rotate the rows of PS such that insns scheduled at time
- START_CYCLE will appear in row 0. Updates max/min_cycles. */
- void
-