aboutsummaryrefslogtreecommitdiffstats
path: root/toolchain-layer/recipes-devtools/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99371.patch
diff options
context:
space:
mode:
Diffstat (limited to 'toolchain-layer/recipes-devtools/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99371.patch')
-rw-r--r--toolchain-layer/recipes-devtools/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99371.patch663
1 files changed, 0 insertions, 663 deletions
diff --git a/toolchain-layer/recipes-devtools/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99371.patch b/toolchain-layer/recipes-devtools/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99371.patch
deleted file mode 100644
index be102160c5..0000000000
--- a/toolchain-layer/recipes-devtools/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99371.patch
+++ /dev/null
@@ -1,663 +0,0 @@
-2010-08-24 Andrew Stubbs <ams@codesourcery.com>
-
- Backport from FSF:
-
- 2010-08-07 Ramana Radhakrishnan <ramana.radhakrishnan@arm.com>
-
- * config/arm/cortex-a9.md: Rewrite VFP Pipeline description.
- * config/arm/arm.c (arm_xscale_tune): Initialize sched_adjust_cost.
- (arm_fastmul_tune,arm_slowmul_tune, arm_9e_tune): Likewise.
- (arm_adjust_cost): Split into xscale_sched_adjust_cost and a
- generic part.
- (cortex_a9_sched_adjust_cost): New function.
- (xscale_sched_adjust_cost): New function.
- * config/arm/arm-protos.h (struct tune_params): New field
- sched_adjust_cost.
- * config/arm/arm-cores.def: Adjust costs for cortex-a9.
-
- 2010-04-17 Richard Earnshaw <rearnsha@arm.com>
-
- * arm-protos.h (tune_params): New structure.
- * arm.c (current_tune): New variable.
- (arm_constant_limit): Delete.
- (struct processors): Add pointer to the tune parameters.
- (arm_slowmul_tune): New tuning option.
- (arm_fastmul_tune, arm_xscale_tune, arm_9e_tune): Likewise.
- (all_cores): Adjust to pick up the tuning model.
- (arm_constant_limit): New function.
- (arm_override_options): Select the appropriate tuning model. Delete
- initialization of arm_const_limit.
- (arm_split_constant): Use the new constant-limit model.
- (arm_rtx_costs): Pick up the current tuning model.
- * arm.md (is_strongarm, is_xscale): Delete.
- * arm-generic.md (load_ldsched_x, load_ldsched): Test explicitly
- for Xscale variant architectures.
- (mult_ldsched_strongarm, mult_ldsched): Similarly for StrongARM.
-
- 2010-08-23 Andrew Stubbs <ams@codesourcery.com>
-
- Backport from FSF:
-
-=== modified file 'gcc/config/arm/arm-cores.def'
---- old/gcc/config/arm/arm-cores.def 2010-07-29 15:53:39 +0000
-+++ new/gcc/config/arm/arm-cores.def 2010-08-24 13:15:54 +0000
-@@ -120,7 +120,7 @@
- ARM_CORE("arm1156t2f-s", arm1156t2fs, 6T2, FL_LDSCHED | FL_VFPV2, 9e)
- ARM_CORE("cortex-a5", cortexa5, 7A, FL_LDSCHED, 9e)
- ARM_CORE("cortex-a8", cortexa8, 7A, FL_LDSCHED, 9e)
--ARM_CORE("cortex-a9", cortexa9, 7A, FL_LDSCHED, 9e)
-+ARM_CORE("cortex-a9", cortexa9, 7A, FL_LDSCHED, cortex_a9)
- ARM_CORE("cortex-r4", cortexr4, 7R, FL_LDSCHED, 9e)
- ARM_CORE("cortex-r4f", cortexr4f, 7R, FL_LDSCHED, 9e)
- ARM_CORE("cortex-m4", cortexm4, 7EM, FL_LDSCHED, 9e)
-
-=== modified file 'gcc/config/arm/arm-generic.md'
---- old/gcc/config/arm/arm-generic.md 2007-08-02 09:49:31 +0000
-+++ new/gcc/config/arm/arm-generic.md 2010-08-24 13:15:54 +0000
-@@ -104,14 +104,14 @@
- (and (eq_attr "generic_sched" "yes")
- (and (eq_attr "ldsched" "yes")
- (and (eq_attr "type" "load_byte,load1")
-- (eq_attr "is_xscale" "yes"))))
-+ (eq_attr "tune" "xscale,iwmmxt,iwmmxt2"))))
- "core")
-
- (define_insn_reservation "load_ldsched" 2
- (and (eq_attr "generic_sched" "yes")
- (and (eq_attr "ldsched" "yes")
- (and (eq_attr "type" "load_byte,load1")
-- (eq_attr "is_xscale" "no"))))
-+ (eq_attr "tune" "!xscale,iwmmxt,iwmmxt2"))))
- "core")
-
- (define_insn_reservation "load_or_store" 2
-@@ -128,14 +128,16 @@
- (define_insn_reservation "mult_ldsched_strongarm" 3
- (and (eq_attr "generic_sched" "yes")
- (and (eq_attr "ldsched" "yes")
-- (and (eq_attr "is_strongarm" "yes")
-+ (and (eq_attr "tune"
-+ "strongarm,strongarm110,strongarm1100,strongarm1110")
- (eq_attr "type" "mult"))))
- "core*2")
-
- (define_insn_reservation "mult_ldsched" 4
- (and (eq_attr "generic_sched" "yes")
- (and (eq_attr "ldsched" "yes")
-- (and (eq_attr "is_strongarm" "no")
-+ (and (eq_attr "tune"
-+ "!strongarm,strongarm110,strongarm1100,strongarm1110")
- (eq_attr "type" "mult"))))
- "core*4")
-
-
-=== modified file 'gcc/config/arm/arm-protos.h'
---- old/gcc/config/arm/arm-protos.h 2010-08-10 13:31:21 +0000
-+++ new/gcc/config/arm/arm-protos.h 2010-08-24 13:15:54 +0000
-@@ -214,4 +214,17 @@
-
- extern void arm_order_regs_for_local_alloc (void);
-
-+#ifdef RTX_CODE
-+/* This needs to be here because we need RTX_CODE and similar. */
-+
-+struct tune_params
-+{
-+ bool (*rtx_costs) (rtx, RTX_CODE, RTX_CODE, int *, bool);
-+ bool (*sched_adjust_cost) (rtx, rtx, rtx, int *);
-+ int constant_limit;
-+};
-+
-+extern const struct tune_params *current_tune;
-+#endif /* RTX_CODE */
-+
- #endif /* ! GCC_ARM_PROTOS_H */
-
-=== modified file 'gcc/config/arm/arm.c'
---- old/gcc/config/arm/arm.c 2010-08-20 16:21:01 +0000
-+++ new/gcc/config/arm/arm.c 2010-08-24 13:15:54 +0000
-@@ -228,6 +228,8 @@
- static void arm_trampoline_init (rtx, tree, rtx);
- static rtx arm_trampoline_adjust_address (rtx);
- static rtx arm_pic_static_addr (rtx orig, rtx reg);
-+static bool cortex_a9_sched_adjust_cost (rtx, rtx, rtx, int *);
-+static bool xscale_sched_adjust_cost (rtx, rtx, rtx, int *);
- static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
- static bool arm_builtin_support_vector_misalignment (enum machine_mode mode,
- const_tree type,
-@@ -545,6 +547,9 @@
- /* The processor for which instructions should be scheduled. */
- enum processor_type arm_tune = arm_none;
-
-+/* The current tuning set. */
-+const struct tune_params *current_tune;
-+
- /* The default processor used if not overridden by commandline. */
- static enum processor_type arm_default_cpu = arm_none;
-
-@@ -720,9 +725,6 @@
- the next function. */
- static int after_arm_reorg = 0;
-
--/* The maximum number of insns to be used when loading a constant. */
--static int arm_constant_limit = 3;
--
- enum arm_pcs arm_pcs_default;
-
- /* For an explanation of these variables, see final_prescan_insn below. */
-@@ -761,8 +763,44 @@
- enum processor_type core;
- const char *arch;
- const unsigned long flags;
-- bool (* rtx_costs) (rtx, enum rtx_code, enum rtx_code, int *, bool);
--};
-+ const struct tune_params *const tune;
-+};
-+
-+const struct tune_params arm_slowmul_tune =
-+{
-+ arm_slowmul_rtx_costs,
-+ NULL,
-+ 3
-+};
-+
-+const struct tune_params arm_fastmul_tune =
-+{
-+ arm_fastmul_rtx_costs,
-+ NULL,
-+ 1
-+};
-+
-+const struct tune_params arm_xscale_tune =
-+{
-+ arm_xscale_rtx_costs,
-+ xscale_sched_adjust_cost,
-+ 2
-+};
-+
-+const struct tune_params arm_9e_tune =
-+{
-+ arm_9e_rtx_costs,
-+ NULL,
-+ 1
-+};
-+
-+const struct tune_params arm_cortex_a9_tune =
-+{
-+ arm_9e_rtx_costs,
-+ cortex_a9_sched_adjust_cost,
-+ 1
-+};
-+
-
- /* Not all of these give usefully different compilation alternatives,
- but there is no simple way of generalizing them. */
-@@ -770,7 +808,7 @@
- {
- /* ARM Cores */
- #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
-- {NAME, arm_none, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, arm_##COSTS##_rtx_costs},
-+ {NAME, arm_none, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
- #include "arm-cores.def"
- #undef ARM_CORE
- {NULL, arm_none, NULL, 0, NULL}
-@@ -779,7 +817,7 @@
- static const struct processors all_architectures[] =
- {
- /* ARM Architectures */
-- /* We don't specify rtx_costs here as it will be figured out
-+ /* We don't specify tuning costs here as it will be figured out
- from the core. */
-
- {"armv2", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
-@@ -928,6 +966,13 @@
- TLS_LE32
- };
-
-+/* The maximum number of insns to be used when loading a constant. */
-+inline static int
-+arm_constant_limit (bool size_p)
-+{
-+ return size_p ? 1 : current_tune->constant_limit;
-+}
-+
- /* Emit an insn that's a simple single-set. Both the operands must be known
- to be valid. */
- inline static rtx
-@@ -1478,6 +1523,7 @@
- }
-
- tune_flags = all_cores[(int)arm_tune].flags;
-+ current_tune = all_cores[(int)arm_tune].tune;
-
- if (target_fp16_format_name)
- {
-@@ -1875,26 +1921,12 @@
-
- if (optimize_size)
- {
-- arm_constant_limit = 1;
--
- /* If optimizing for size, bump the number of instructions that we
- are prepared to conditionally execute (even on a StrongARM). */
- max_insns_skipped = 6;
- }
- else
- {
-- /* For processors with load scheduling, it never costs more than
-- 2 cycles to load a constant, and the load scheduler may well
-- reduce that to 1. */
-- if (arm_ld_sched)
-- arm_constant_limit = 1;
--
-- /* On XScale the longer latency of a load makes it more difficult
-- to achieve a good schedule, so it's faster to synthesize
-- constants that can be done in two insns. */
-- if (arm_tune_xscale)
-- arm_constant_limit = 2;
--
- /* StrongARM has early execution of branches, so a sequence
- that is worth skipping is shorter. */
- if (arm_tune_strongarm)
-@@ -2423,7 +2455,8 @@
- && !cond
- && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
- 1, 0)
-- > arm_constant_limit + (code != SET)))
-+ > (arm_constant_limit (optimize_function_for_size_p (cfun))
-+ + (code != SET))))
- {
- if (code == SET)
- {
-@@ -7771,9 +7804,9 @@
- (enum rtx_code) outer_code, total);
- }
- else
-- return all_cores[(int)arm_tune].rtx_costs (x, (enum rtx_code) code,
-- (enum rtx_code) outer_code,
-- total, speed);
-+ return current_tune->rtx_costs (x, (enum rtx_code) code,
-+ (enum rtx_code) outer_code,
-+ total, speed);
- }
-
- /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
-@@ -7918,7 +7951,8 @@
- so it can be ignored. */
-
- static bool
--arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code, int *total, bool speed)
-+arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
-+ int *total, bool speed)
- {
- enum machine_mode mode = GET_MODE (x);
-
-@@ -8119,15 +8153,15 @@
- return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
- }
-
--static int
--arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
-+/* Adjust cost hook for XScale. */
-+static bool
-+xscale_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
- {
- rtx i_pat, d_pat;
-
- /* Some true dependencies can have a higher cost depending
- on precisely how certain input operands are used. */
-- if (arm_tune_xscale
-- && REG_NOTE_KIND (link) == 0
-+ if (REG_NOTE_KIND (link) == 0
- && recog_memoized (insn) >= 0
- && recog_memoized (dep) >= 0)
- {
-@@ -8161,10 +8195,106 @@
-
- if (reg_overlap_mentioned_p (recog_data.operand[opno],
- shifted_operand))
-- return 2;
-+ {
-+ *cost = 2;
-+ return false;
-+ }
- }
- }
- }
-+ return true;
-+}
-+
-+/* Adjust cost hook for Cortex A9. */
-+static bool
-+cortex_a9_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
-+{
-+ switch (REG_NOTE_KIND (link))
-+ {
-+ case REG_DEP_ANTI:
-+ *cost = 0;
-+ return false;
-+
-+ case REG_DEP_TRUE:
-+ case REG_DEP_OUTPUT:
-+ if (recog_memoized (insn) >= 0
-+ && recog_memoized (dep) >= 0)
-+ {
-+ if (GET_CODE (PATTERN (insn)) == SET)
-+ {
-+ if (GET_MODE_CLASS
-+ (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
-+ || GET_MODE_CLASS
-+ (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
-+ {
-+ enum attr_type attr_type_insn = get_attr_type (insn);
-+ enum attr_type attr_type_dep = get_attr_type (dep);
-+
-+ /* By default all dependencies of the form
-+ s0 = s0 <op> s1
-+ s0 = s0 <op> s2
-+ have an extra latency of 1 cycle because
-+ of the input and output dependency in this
-+ case. However this gets modeled as an true
-+ dependency and hence all these checks. */
-+ if (REG_P (SET_DEST (PATTERN (insn)))
-+ && REG_P (SET_DEST (PATTERN (dep)))
-+ && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)),
-+ SET_DEST (PATTERN (dep))))
-+ {
-+ /* FMACS is a special case where the dependant
-+ instruction can be issued 3 cycles before
-+ the normal latency in case of an output
-+ dependency. */
-+ if ((attr_type_insn == TYPE_FMACS
-+ || attr_type_insn == TYPE_FMACD)
-+ && (attr_type_dep == TYPE_FMACS
-+ || attr_type_dep == TYPE_FMACD))
-+ {
-+ if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
-+ *cost = insn_default_latency (dep) - 3;
-+ else
-+ *cost = insn_default_latency (dep);
-+ return false;
-+ }
-+ else
-+ {
-+ if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
-+ *cost = insn_default_latency (dep) + 1;
-+ else
-+ *cost = insn_default_latency (dep);
-+ }
-+ return false;
-+ }
-+ }
-+ }
-+ }
-+ break;
-+
-+ default:
-+ gcc_unreachable ();
-+ }
-+
-+ return true;
-+}
-+
-+/* This function implements the target macro TARGET_SCHED_ADJUST_COST.
-+ It corrects the value of COST based on the relationship between
-+ INSN and DEP through the dependence LINK. It returns the new
-+ value. There is a per-core adjust_cost hook to adjust scheduler costs
-+ and the per-core hook can choose to completely override the generic
-+ adjust_cost function. Only put bits of code into arm_adjust_cost that
-+ are common across all cores. */
-+static int
-+arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
-+{
-+ rtx i_pat, d_pat;
-+
-+ if (current_tune->sched_adjust_cost != NULL)
-+ {
-+ if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
-+ return cost;
-+ }
-
- /* XXX This is not strictly true for the FPA. */
- if (REG_NOTE_KIND (link) == REG_DEP_ANTI
-@@ -8187,7 +8317,8 @@
- constant pool are cached, and that others will miss. This is a
- hack. */
-
-- if ((GET_CODE (src_mem) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (src_mem))
-+ if ((GET_CODE (src_mem) == SYMBOL_REF
-+ && CONSTANT_POOL_ADDRESS_P (src_mem))
- || reg_mentioned_p (stack_pointer_rtx, src_mem)
- || reg_mentioned_p (frame_pointer_rtx, src_mem)
- || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
-
-=== modified file 'gcc/config/arm/arm.md'
---- old/gcc/config/arm/arm.md 2010-08-23 14:39:12 +0000
-+++ new/gcc/config/arm/arm.md 2010-08-24 13:15:54 +0000
-@@ -150,13 +150,6 @@
- ; patterns that share the same RTL in both ARM and Thumb code.
- (define_attr "is_thumb" "no,yes" (const (symbol_ref "thumb_code")))
-
--; IS_STRONGARM is set to 'yes' when compiling for StrongARM, it affects
--; scheduling decisions for the load unit and the multiplier.
--(define_attr "is_strongarm" "no,yes" (const (symbol_ref "arm_tune_strongarm")))
--
--; IS_XSCALE is set to 'yes' when compiling for XScale.
--(define_attr "is_xscale" "no,yes" (const (symbol_ref "arm_tune_xscale")))
--
- ;; Operand number of an input operand that is shifted. Zero if the
- ;; given instruction does not shift one of its input operands.
- (define_attr "shift" "" (const_int 0))
-
-=== modified file 'gcc/config/arm/cortex-a9.md'
---- old/gcc/config/arm/cortex-a9.md 2009-10-31 16:40:03 +0000
-+++ new/gcc/config/arm/cortex-a9.md 2010-08-24 13:15:54 +0000
-@@ -2,8 +2,10 @@
- ;; Copyright (C) 2008, 2009 Free Software Foundation, Inc.
- ;; Originally written by CodeSourcery for VFP.
- ;;
--;; Integer core pipeline description contributed by ARM Ltd.
--;;
-+;; Rewritten by Ramana Radhakrishnan <ramana.radhakrishnan@arm.com>
-+;; Integer Pipeline description contributed by ARM Ltd.
-+;; VFP Pipeline description rewritten and contributed by ARM Ltd.
-+
- ;; This file is part of GCC.
- ;;
- ;; GCC is free software; you can redistribute it and/or modify it
-@@ -22,28 +24,27 @@
-
- (define_automaton "cortex_a9")
-
--;; The Cortex-A9 integer core is modelled as a dual issue pipeline that has
-+;; The Cortex-A9 core is modelled as a dual issue pipeline that has
- ;; the following components.
- ;; 1. 1 Load Store Pipeline.
- ;; 2. P0 / main pipeline for data processing instructions.
- ;; 3. P1 / Dual pipeline for Data processing instructions.
- ;; 4. MAC pipeline for multiply as well as multiply
- ;; and accumulate instructions.
--;; 5. 1 VFP / Neon pipeline.
--;; The Load/Store and VFP/Neon pipeline are multiplexed.
-+;; 5. 1 VFP and an optional Neon unit.
-+;; The Load/Store, VFP and Neon issue pipeline are multiplexed.
- ;; The P0 / main pipeline and M1 stage of the MAC pipeline are
- ;; multiplexed.
- ;; The P1 / dual pipeline and M2 stage of the MAC pipeline are
- ;; multiplexed.
--;; There are only 4 register read ports and hence at any point of
-+;; There are only 4 integer register read ports and hence at any point of
- ;; time we can't have issue down the E1 and the E2 ports unless
- ;; of course there are bypass paths that get exercised.
- ;; Both P0 and P1 have 2 stages E1 and E2.
- ;; Data processing instructions issue to E1 or E2 depending on
- ;; whether they have an early shift or not.
-
--
--(define_cpu_unit "cortex_a9_vfp, cortex_a9_ls" "cortex_a9")
-+(define_cpu_unit "ca9_issue_vfp_neon, cortex_a9_ls" "cortex_a9")
- (define_cpu_unit "cortex_a9_p0_e1, cortex_a9_p0_e2" "cortex_a9")
- (define_cpu_unit "cortex_a9_p1_e1, cortex_a9_p1_e2" "cortex_a9")
- (define_cpu_unit "cortex_a9_p0_wb, cortex_a9_p1_wb" "cortex_a9")
-@@ -71,11 +72,7 @@
-
- ;; Issue at the same time along the load store pipeline and
- ;; the VFP / Neon pipeline is not possible.
--;; FIXME:: At some point we need to model the issue
--;; of the load store and the vfp being shared rather than anything else.
--
--(exclusion_set "cortex_a9_ls" "cortex_a9_vfp")
--
-+(exclusion_set "cortex_a9_ls" "ca9_issue_vfp_neon")
-
- ;; Default data processing instruction without any shift
- ;; The only exception to this is the mov instruction
-@@ -101,18 +98,13 @@
-
- (define_insn_reservation "cortex_a9_load1_2" 4
- (and (eq_attr "tune" "cortexa9")
-- (eq_attr "type" "load1, load2, load_byte"))
-+ (eq_attr "type" "load1, load2, load_byte, f_loads, f_loadd"))
- "cortex_a9_ls")
-
- ;; Loads multiples and store multiples can't be issued for 2 cycles in a
- ;; row. The description below assumes that addresses are 64 bit aligned.
- ;; If not, there is an extra cycle latency which is not modelled.
-
--;; FIXME:: This bit might need to be reworked when we get to
--;; tuning for the VFP because strictly speaking the ldm
--;; is sent to the LSU unit as is and there is only an
--;; issue restriction between the LSU and the VFP/ Neon unit.
--
- (define_insn_reservation "cortex_a9_load3_4" 5
- (and (eq_attr "tune" "cortexa9")
- (eq_attr "type" "load3, load4"))
-@@ -120,12 +112,13 @@
-
- (define_insn_reservation "cortex_a9_store1_2" 0
- (and (eq_attr "tune" "cortexa9")
-- (eq_attr "type" "store1, store2"))
-+ (eq_attr "type" "store1, store2, f_stores, f_stored"))
- "cortex_a9_ls")
-
- ;; Almost all our store multiples use an auto-increment
- ;; form. Don't issue back to back load and store multiples
- ;; because the load store unit will stall.
-+
- (define_insn_reservation "cortex_a9_store3_4" 0
- (and (eq_attr "tune" "cortexa9")
- (eq_attr "type" "store3, store4"))
-@@ -193,47 +186,79 @@
- (define_insn_reservation "cortex_a9_call" 0
- (and (eq_attr "tune" "cortexa9")
- (eq_attr "type" "call"))
-- "cortex_a9_issue_branch + cortex_a9_multcycle1 + cortex_a9_ls + cortex_a9_vfp")
-+ "cortex_a9_issue_branch + cortex_a9_multcycle1 + cortex_a9_ls + ca9_issue_vfp_neon")
-
-
- ;; Pipelining for VFP instructions.
--
--(define_insn_reservation "cortex_a9_ffarith" 1
-+;; Issue happens either along load store unit or the VFP / Neon unit.
-+;; Pipeline Instruction Classification.
-+;; FPS - fcpys, ffariths, ffarithd,r_2_f,f_2_r
-+;; FP_ADD - fadds, faddd, fcmps (1)
-+;; FPMUL - fmul{s,d}, fmac{s,d}
-+;; FPDIV - fdiv{s,d}
-+(define_cpu_unit "ca9fps" "cortex_a9")
-+(define_cpu_unit "ca9fp_add1, ca9fp_add2, ca9fp_add3, ca9fp_add4" "cortex_a9")
-+(define_cpu_unit "ca9fp_mul1, ca9fp_mul2 , ca9fp_mul3, ca9fp_mul4" "cortex_a9")
-+(define_cpu_unit "ca9fp_ds1" "cortex_a9")
-+
-+
-+;; fmrs, fmrrd, fmstat and fmrx - The data is available after 1 cycle.
-+(define_insn_reservation "cortex_a9_fps" 2
- (and (eq_attr "tune" "cortexa9")
-- (eq_attr "type" "fcpys,ffariths,ffarithd,fcmps,fcmpd,fconsts,fconstd"))
-- "cortex_a9_vfp")
-+ (eq_attr "type" "fcpys, fconsts, fconstd, ffariths, ffarithd, r_2_f, f_2_r, f_flag"))
-+ "ca9_issue_vfp_neon + ca9fps")
-+
-+(define_bypass 1
-+ "cortex_a9_fps"
-+ "cortex_a9_fadd, cortex_a9_fps, cortex_a9_fcmp, cortex_a9_dp, cortex_a9_dp_shift, cortex_a9_multiply")
-+
-+;; Scheduling on the FP_ADD pipeline.
-+(define_reservation "ca9fp_add" "ca9_issue_vfp_neon + ca9fp_add1, ca9fp_add2, ca9fp_add3, ca9fp_add4")
-
- (define_insn_reservation "cortex_a9_fadd" 4
-- (and (eq_attr "tune" "cortexa9")
-- (eq_attr "type" "fadds,faddd,f_cvt"))
-- "cortex_a9_vfp")
--
--(define_insn_reservation "cortex_a9_fmuls" 5
-- (and (eq_attr "tune" "cortexa9")
-- (eq_attr "type" "fmuls"))
-- "cortex_a9_vfp")
--
--(define_insn_reservation "cortex_a9_fmuld" 6
-- (and (eq_attr "tune" "cortexa9")
-- (eq_attr "type" "fmuld"))
-- "cortex_a9_vfp*2")
-+ (and (eq_attr "tune" "cortexa9")
-+ (eq_attr "type" "fadds, faddd, f_cvt"))
-+ "ca9fp_add")
-+
-+(define_insn_reservation "cortex_a9_fcmp" 1
-+ (and (eq_attr "tune" "cortexa9")
-+ (eq_attr "type" "fcmps, fcmpd"))
-+ "ca9_issue_vfp_neon + ca9fp_add1")
-+
-+;; Scheduling for the Multiply and MAC instructions.
-+(define_reservation "ca9fmuls"
-+ "ca9fp_mul1 + ca9_issue_vfp_neon, ca9fp_mul2, ca9fp_mul3, ca9fp_mul4")
-+
-+(define_reservation "ca9fmuld"
-+ "ca9fp_mul1 + ca9_issue_vfp_neon, (ca9fp_mul1 + ca9fp_mul2), ca9fp_mul2, ca9fp_mul3, ca9fp_mul4")
-+
-+(define_insn_reservation "cortex_a9_fmuls" 4
-+ (and (eq_attr "tune" "cortexa9")
-+ (eq_attr "type" "fmuls"))
-+ "ca9fmuls")
-+
-+(define_insn_reservation "cortex_a9_fmuld" 5
-+ (and (eq_attr "tune" "cortexa9")
-+ (eq_attr "type" "fmuld"))
-+ "ca9fmuld")
-
- (define_insn_reservation "cortex_a9_fmacs" 8
-- (and (eq_attr "tune" "cortexa9")
-- (eq_attr "type" "fmacs"))
-- "cortex_a9_vfp")
--
--(define_insn_reservation "cortex_a9_fmacd" 8
-- (and (eq_attr "tune" "cortexa9")
-- (eq_attr "type" "fmacd"))
-- "cortex_a9_vfp*2")
--
-+ (and (eq_attr "tune" "cortexa9")
-+ (eq_attr "type" "fmacs"))
-+ "ca9fmuls, ca9fp_add")
-+
-+(define_insn_reservation "cortex_a9_fmacd" 9
-+ (and (eq_attr "tune" "cortexa9")
-+ (eq_attr "type" "fmacd"))
-+ "ca9fmuld, ca9fp_add")
-+
-+;; Division pipeline description.
- (define_insn_reservation "cortex_a9_fdivs" 15
-- (and (eq_attr "tune" "cortexa9")
-- (eq_attr "type" "fdivs"))
-- "cortex_a9_vfp*10")
-+ (and (eq_attr "tune" "cortexa9")
-+ (eq_attr "type" "fdivs"))
-+ "ca9fp_ds1 + ca9_issue_vfp_neon, nothing*14")
-
- (define_insn_reservation "cortex_a9_fdivd" 25
-- (and (eq_attr "tune" "cortexa9")
-- (eq_attr "type" "fdivd"))
-- "cortex_a9_vfp*20")
-+ (and (eq_attr "tune" "cortexa9")
-+ (eq_attr "type" "fdivd"))
-+ "ca9fp_ds1 + ca9_issue_vfp_neon, nothing*24")
-