This patch: - maps branch-cirrus_insn to branch-nop-nop-cirrus_insn - maps branch-noncirrus-cirrus to branch-noncirrus-nop-cirrus - inserts a nop in load rN - load/store64 mvX,[rN] sequences to avoid an undocumented hardware bug. - always fixes up invalid code sequences when compiling hard Maverick insns and removes the -mcirrus-fix-invalid-insns flag because chip development has stopped and all existing silicon has these bugs, while the extra code that claimed to do other things for the extra bugs in the old revision D0 silicon was complete junk. - Takes the cirrus checking out of the main arm_reorg loop, to remove the speed penalty it caused when not compiling for Maverick. Martin Guy 3 March 2009 Index: gcc-4.2.4/gcc/config/arm/arm.c =================================================================== --- gcc-4.2.4.orig/gcc/config/arm/arm.c 2009-08-09 16:06:45.000000000 +0100 +++ gcc-4.2.4/gcc/config/arm/arm.c 2009-08-09 16:08:04.000000000 +0100 @@ -132,7 +132,7 @@ static int arm_address_cost (rtx); static bool arm_memory_load_p (rtx); static bool arm_cirrus_insn_p (rtx); -static void cirrus_reorg (rtx); +static void cirrus_reorg (void); static void arm_init_builtins (void); static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int); static void arm_init_iwmmxt_builtins (void); @@ -5491,6 +5491,9 @@ body = PATTERN (insn); + if (GET_CODE (body) == COND_EXEC) + body = COND_EXEC_CODE (body); + if (GET_CODE (body) != SET) return false; @@ -5533,122 +5536,118 @@ /* Cirrus reorg for invalid instruction combinations. */ static void -cirrus_reorg (rtx first) +cirrus_reorg (void) { - enum attr_cirrus attr; - rtx body = PATTERN (first); - rtx t; - int nops; - - /* Any branch must be followed by 2 non Cirrus instructions. */ - if (GET_CODE (first) == JUMP_INSN && GET_CODE (body) != RETURN) - { - nops = 0; - t = next_nonnote_insn (first); - - if (arm_cirrus_insn_p (t)) - ++ nops; - - if (arm_cirrus_insn_p (next_nonnote_insn (t))) - ++ nops; - - while (nops --) - emit_insn_after (gen_nop (), first); - - return; - } - - /* (float (blah)) is in parallel with a clobber. */ - if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0) - body = XVECEXP (body, 0, 0); - - if (GET_CODE (body) == SET) - { - rtx lhs = XEXP (body, 0), rhs = XEXP (body, 1); + rtx insn, body; - /* cfldrd, cfldr64, cfstrd, cfstr64 must - be followed by a non Cirrus insn. */ - if (get_attr_cirrus (first) == CIRRUS_DOUBLE) - { - if (arm_cirrus_insn_p (next_nonnote_insn (first))) - emit_insn_after (gen_nop (), first); - - return; - } - else if (arm_memory_load_p (first)) - { - unsigned int arm_regno; + /* Examine every instruction and see if it needs adjusting */ + for (insn = get_insns (); insn; insn = next_insn (insn)) + switch (GET_CODE (insn)) + { + case JUMP_INSN: + /* Any branch must be followed by 2 non Cirrus instructions. */ + body = PATTERN (insn); + if (GET_CODE (body) != RETURN) + { + rtx next = next_real_insn (insn); - /* Any ldr/cfmvdlr, ldr/cfmvdhr, ldr/cfmvsr, ldr/cfmv64lr, - ldr/cfmv64hr combination where the Rd field is the same - in both instructions must be split with a non Cirrus - insn. Example: + if (arm_cirrus_insn_p (next)) + { + emit_insn_after (gen_nop (), insn); + emit_insn_after (gen_nop (), insn); + } + else + if (arm_cirrus_insn_p (next_real_insn (next))) + emit_insn_after (gen_nop (), next); + } + break; + case INSN: + /* Any ldr/cfstrd combination where the Rd field is the same + in both instructions must be split with a non Cirrus insn. + Example: ldr r0, blah nop - cfmvsr mvf0, r0. */ - - /* Get Arm register number for ldr insn. */ - if (GET_CODE (lhs) == REG) - arm_regno = REGNO (lhs); - else - { - gcc_assert (GET_CODE (rhs) == REG); - arm_regno = REGNO (rhs); - } - - /* Next insn. */ - first = next_nonnote_insn (first); - - if (! arm_cirrus_insn_p (first)) - return; - - body = PATTERN (first); + cfstrd mvd0, [r0] + otherwise the FPU stores to random memory locations. + */ + body = PATTERN (insn); + + /* Also applies to conditionally executed ldr */ + if (GET_CODE (body) == COND_EXEC) + body = COND_EXEC_CODE (body); - /* (float (blah)) is in parallel with a clobber. */ - if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0)) - body = XVECEXP (body, 0, 0); - - if (GET_CODE (body) == FLOAT) - body = XEXP (body, 0); - - if (get_attr_cirrus (first) == CIRRUS_MOVE - && GET_CODE (XEXP (body, 1)) == REG - && arm_regno == REGNO (XEXP (body, 1))) - emit_insn_after (gen_nop (), first); - - return; - } - } + /* If first insn is ldr rN, ... */ + if (GET_CODE (body) == SET && arm_memory_load_p (insn)) + { + rtx next = next_real_insn (insn); - /* get_attr cannot accept USE or CLOBBER. */ - if (!first - || GET_CODE (first) != INSN - || GET_CODE (PATTERN (first)) == USE - || GET_CODE (PATTERN (first)) == CLOBBER) - return; + /* ...and second is cirrus double word load or store... */ + if (arm_cirrus_insn_p (next) + && get_attr_cirrus (next) == CIRRUS_DOUBLE) + { + rtx nextbody = PATTERN (next); + rtx ldr_target; /* destination of ldr insn: rN */ + rtx arm_part; /* src or dest espression involving [rN] */ + unsigned int arm_regno; /* the arm reg in the [rN] part */ - attr = get_attr_cirrus (first); + ldr_target = XEXP (body, 0); + gcc_assert (GET_CODE (ldr_target) == REG); - /* Any coprocessor compare instruction (cfcmps, cfcmpd, ...) - must be followed by a non-coprocessor instruction. */ - if (attr == CIRRUS_COMPARE) - { - nops = 0; + gcc_assert (GET_CODE (nextbody) == SET); - t = next_nonnote_insn (first); + /* Find the load or store address of the insn */ + switch (GET_CODE (XEXP (nextbody, 0))) + { + case MEM: /* it's cfstrd/64 */ + gcc_assert (GET_CODE (XEXP (nextbody, 1)) == REG); + arm_part = XEXP (XEXP (nextbody, 0), 0); + break; + + case REG: /* it's cfldrd/64 */ + if (GET_CODE (XEXP (nextbody, 1)) == MEM) + arm_part = XEXP (XEXP (nextbody, 1), 0); + else + /* It can also be const_double or const_int, which will + * turn into harmless [pc, #offset] in arm_reorg() */ + continue; + break; - if (arm_cirrus_insn_p (t)) - ++ nops; + default: + gcc_unreachable (); + } - if (arm_cirrus_insn_p (next_nonnote_insn (t))) - ++ nops; + /* Find the arm register number in the [rN] expression */ + arm_regno = 0; /* none */ + switch (GET_CODE (arm_part)) + { + case REG: /* it's [rN] */ + arm_regno = REGNO (arm_part); + break; + + case PLUS: /* it's [rN, #XXX] or [rN, -#YYY]. */ + case PRE_INC: + case POST_INC: + case PRE_DEC: + case POST_DEC: + gcc_assert (GET_CODE (XEXP (arm_part, 0)) == REG); + arm_regno = REGNO (XEXP (arm_part, 0)); + break; + + default: + /* Do nothing */ + continue; + } - while (nops --) - emit_insn_after (gen_nop (), first); + if (arm_regno == REGNO (ldr_target)) + emit_insn_after (gen_nop (), insn); + } + } + break; - return; - } + default: + break; + } } /* Return TRUE if X references a SYMBOL_REF. */ @@ -8227,6 +8226,10 @@ minipool_fix_head = minipool_fix_tail = NULL; + /* Do cirrus_reorg() first as it may insert extra instructions */ + if (TARGET_MAVERICK && TARGET_HARD_FLOAT) + cirrus_reorg (); + /* The first insn must always be a note, or the code below won't scan it properly. */ insn = get_insns (); @@ -8236,12 +8239,6 @@ /* Scan all the insns and record the operands that will need fixing. */ for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn)) { - if (TARGET_CIRRUS_FIX_INVALID_INSNS - && (arm_cirrus_insn_p (insn) - || GET_CODE (insn) == JUMP_INSN - || arm_memory_load_p (insn))) - cirrus_reorg (insn); - if (GET_CODE (insn) == BARRIER) push_minipool_barrier (insn, address); else if (INSN_P (insn)) Index: gcc-4.2.4/gcc/config/arm/arm.opt =================================================================== --- gcc-4.2.4.orig/gcc/config/arm/arm.opt 2009-08-09 16:06:46.000000000 +0100 +++ gcc-4.2.4/gcc/config/arm/arm.opt 2009-08-09 16:07:39.000000000 +0100 @@ -63,10 +63,6 @@ Target Report Mask(CALLER_INTERWORKING) Thumb: Assume function pointers may go to non-Thumb aware code -mcirrus-fix-invalid-insns -Target Report Mask(CIRRUS_FIX_INVALID_INSNS) -Cirrus: Place NOPs to avoid invalid instruction combinations - mcpu= Target RejectNegative Joined Specify the name of the target CPU Index: gcc-4.2.4/gcc/doc/invoke.texi =================================================================== --- gcc-4.2.4.orig/gcc/doc/invoke.texi 2009-08-09 16:06:45.000000000 +0100 +++ gcc-4.2.4/gcc/doc/invoke.texi 2009-08-09 16:07:39.000000000 +0100 @@ -417,7 +417,6 @@ -msingle-pic-base -mno-single-pic-base @gol -mpic-register=@var{reg} @gol -mnop-fun-dllimport @gol --mcirrus-fix-invalid-insns -mno-cirrus-fix-invalid-insns @gol -mieee @gol -mpoke-function-name @gol -mthumb -marm @gol @@ -7771,18 +7770,6 @@ Specify the register to be used for PIC addressing. The default is R10 unless stack-checking is enabled, when R9 is used. -@item -mcirrus-fix-invalid-insns -@opindex mcirrus-fix-invalid-insns -@opindex mno-cirrus-fix-invalid-insns -Insert NOPs into the instruction stream to in order to work around -problems with invalid Maverick instruction combinations. This option -is only valid if the @option{-mcpu=ep9312} option has been used to -enable generation of instructions for the Cirrus Maverick floating -point co-processor. This option is not enabled by default, since the -problem is only present in older Maverick implementations. The default -can be re-enabled by use of the @option{-mno-cirrus-fix-invalid-insns} -switch. - @item -mieee When compiling for the Maverick FPU, disable the instructions that fail to honor denormalized values. As these include floating point add, sub,