diff -Nru gcc-3.4.4.ori/gcc/config/arm/arm.c gcc-3.4.4.patched/gcc/config/arm/arm.c --- gcc-3.4.4.ori/gcc/config/arm/arm.c 2005-02-01 16:07:02.000000000 +0100 +++ gcc-3.4.4.patched/gcc/config/arm/arm.c 2005-05-30 23:46:39.000000000 +0200 @@ -4,6 +4,7 @@ Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl) and Martin Simmons (@harleqn.co.uk). More major hacks by Richard Earnshaw (rearnsha@arm.com). + Cirrus Crunch fixes by Vladimir Ivanov (vladitx@nucleusys.com) This file is part of GCC. @@ -126,9 +127,15 @@ static int arm_rtx_costs_1 (rtx, enum rtx_code, enum rtx_code); static bool arm_rtx_costs (rtx, int, int, int *); static int arm_address_cost (rtx); -static bool arm_memory_load_p (rtx); static bool arm_cirrus_insn_p (rtx); -static void cirrus_reorg (rtx); +static bool arm_mem_access_p (rtx); +static bool cirrus_dest_regn_p (rtx, int); +static rtx cirrus_prev_next_mach_insn (rtx, int *, int); +static rtx cirrus_prev_mach_insn (rtx, int *); +static rtx cirrus_next_mach_insn (rtx, int *); +static void cirrus_reorg_branch (rtx); +static void cirrus_reorg_bug1 (rtx); +static void cirrus_reorg_bug10_12 (rtx); static void arm_init_builtins (void); static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int); static void arm_init_iwmmxt_builtins (void); @@ -280,7 +287,7 @@ #define FL_STRONG (1 << 8) /* StrongARM */ #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */ #define FL_XSCALE (1 << 10) /* XScale */ -#define FL_CIRRUS (1 << 11) /* Cirrus/DSP. */ +#define FL_CIRRUS (1 << 11) /* Cirrus Crunch coprocessor. */ #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */ #define FL_ARCH6J (1 << 12) /* Architecture rel 6. Adds media instructions. */ @@ -329,8 +336,8 @@ /* Nonzero if this chip is an ARM6 or an ARM7. */ int arm_is_6_or_7 = 0; -/* Nonzero if this chip is a Cirrus/DSP. */ -int arm_is_cirrus = 0; +/* Nonzero if this chip supports Cirrus Crunch coprocessor. */ +int arm_arch_cirrus = 0; /* Nonzero if generating Thumb instructions. */ int thumb_code = 0; @@ -763,19 +770,17 @@ arm_is_6_or_7 = (((tune_flags & (FL_MODE26 | FL_MODE32)) && !(tune_flags & FL_ARCH4))) != 0; arm_tune_xscale = (tune_flags & FL_XSCALE) != 0; - arm_is_cirrus = (tune_flags & FL_CIRRUS) != 0; + arm_arch_cirrus = (tune_flags & FL_CIRRUS) != 0; arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0; if (TARGET_IWMMXT && (! TARGET_ATPCS)) target_flags |= ARM_FLAG_ATPCS; - if (arm_is_cirrus) + if (arm_arch_cirrus) { + /* Cirrus Crunch coprocessor still requires soft-float division. */ arm_fpu_tune = FPUTYPE_MAVERICK; - - /* Ignore -mhard-float if -mcpu=ep9312. */ - if (TARGET_HARD_FLOAT) - target_flags ^= ARM_FLAG_SOFT_FLOAT; + target_flags |= ARM_FLAG_SOFT_FLOAT; } else /* Default value for floating point code... if no co-processor @@ -1117,6 +1122,12 @@ if (regs_ever_live[regno] && !call_used_regs[regno]) return 0; + if (TARGET_CIRRUS) + for (regno = FIRST_CIRRUS_FP_REGNUM; regno <= LAST_CIRRUS_FP_REGNUM; regno++) + if (regs_ever_live[regno] && !call_used_regs[regno]) + return 0; + + if (TARGET_REALLY_IWMMXT) for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++) if (regs_ever_live[regno] && ! call_used_regs [regno]) @@ -4066,41 +4077,6 @@ && REGNO (x) <= LAST_VIRTUAL_REGISTER)); } -/* Returns TRUE if INSN is an "LDR REG, ADDR" instruction. - Use by the Cirrus Maverick code which has to workaround - a hardware bug triggered by such instructions. */ -static bool -arm_memory_load_p (rtx insn) -{ - rtx body, lhs, rhs;; - - if (insn == NULL_RTX || GET_CODE (insn) != INSN) - return false; - - body = PATTERN (insn); - - if (GET_CODE (body) != SET) - return false; - - lhs = XEXP (body, 0); - rhs = XEXP (body, 1); - - lhs = REG_OR_SUBREG_RTX (lhs); - - /* If the destination is not a general purpose - register we do not have to worry. */ - if (GET_CODE (lhs) != REG - || REGNO_REG_CLASS (REGNO (lhs)) != GENERAL_REGS) - return false; - - /* As well as loads from memory we also have to react - to loads of invalid constants which will be turned - into loads from the minipool. */ - return (GET_CODE (rhs) == MEM - || GET_CODE (rhs) == SYMBOL_REF - || note_invalid_constants (insn, -1, false)); -} - /* Return TRUE if INSN is a Cirrus instruction. */ static bool arm_cirrus_insn_p (rtx insn) @@ -4119,123 +4095,209 @@ return attr != CIRRUS_NOT; } -/* Cirrus reorg for invalid instruction combinations. */ -static void -cirrus_reorg (rtx first) +/* Return TRUE if INSN does memory access. */ +static bool +arm_mem_access_p (rtx insn) { - enum attr_cirrus attr; - rtx body = PATTERN (first); - rtx t; - int nops; + enum attr_type attr; - /* Any branch must be followed by 2 non Cirrus instructions. */ - if (GET_CODE (first) == JUMP_INSN && GET_CODE (body) != RETURN) - { - nops = 0; - t = next_nonnote_insn (first); + /* get_attr aborts on USE and CLOBBER. */ + if (!insn + || GET_CODE (insn) != INSN + || GET_CODE (PATTERN (insn)) == USE + || GET_CODE (PATTERN (insn)) == CLOBBER) + return 0; - if (arm_cirrus_insn_p (t)) - ++ nops; + attr = get_attr_type (insn); - if (arm_cirrus_insn_p (next_nonnote_insn (t))) - ++ nops; + return attr == TYPE_LOAD + || attr == TYPE_STORE1 + || attr == TYPE_STORE2 + || attr == TYPE_STORE3 + || attr == TYPE_STORE4; +} - while (nops --) - emit_insn_after (gen_nop (), first); +/* Return TRUE if destination is certain Cirrus register. */ +static bool +cirrus_dest_regn_p (rtx body, int regn) +{ + rtx lhs; + int reg; - return; - } + lhs = XEXP (body, 0); + if (GET_CODE (lhs) != REG) + return 0; - /* (float (blah)) is in parallel with a clobber. */ - if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0) - body = XVECEXP (body, 0, 0); + reg = REGNO (lhs); + if (REGNO_REG_CLASS (reg) != CIRRUS_REGS) + return 0; - if (GET_CODE (body) == SET) - { - rtx lhs = XEXP (body, 0), rhs = XEXP (body, 1); + return reg == regn; +} - /* cfldrd, cfldr64, cfstrd, cfstr64 must - be followed by a non Cirrus insn. */ - if (get_attr_cirrus (first) == CIRRUS_DOUBLE) - { - if (arm_cirrus_insn_p (next_nonnote_insn (first))) - emit_insn_after (gen_nop (), first); +/* Get previous/next machine instruction during Cirrus workaround scans. + Assume worst case (for the purpose of Cirrus workarounds) + for JUMP / CALL instructions. */ +static rtx +cirrus_prev_next_mach_insn (rtx insn, int *len, int next) +{ + rtx t; + int l = 0; + + /* It seems that we can count only on INSN length. */ + for ( ; ; ) + { + if (next) + insn = NEXT_INSN (insn); + else + insn = PREV_INSN (insn); + if (!insn) + break; + if (GET_CODE (insn) == INSN) + { + l = get_attr_length (insn) / 4; + if (l) + break; + } + else if (GET_CODE (insn) == JUMP_INSN) + { + l = 1; + t = is_jump_table (insn); + if (t) + l += get_jump_table_size (t) / 4; + break; - return; } - else if (arm_memory_load_p (first)) + else if (GET_CODE (insn) == CALL_INSN) { - unsigned int arm_regno; - - /* Any ldr/cfmvdlr, ldr/cfmvdhr, ldr/cfmvsr, ldr/cfmv64lr, - ldr/cfmv64hr combination where the Rd field is the same - in both instructions must be split with a non Cirrus - insn. Example: - - ldr r0, blah - nop - cfmvsr mvf0, r0. */ - - /* Get Arm register number for ldr insn. */ - if (GET_CODE (lhs) == REG) - arm_regno = REGNO (lhs); - else if (GET_CODE (rhs) == REG) - arm_regno = REGNO (rhs); - else - abort (); - - /* Next insn. */ - first = next_nonnote_insn (first); - - if (! arm_cirrus_insn_p (first)) - return; - - body = PATTERN (first); + l = 1; + break; + } + } - /* (float (blah)) is in parallel with a clobber. */ - if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0)) - body = XVECEXP (body, 0, 0); - - if (GET_CODE (body) == FLOAT) - body = XEXP (body, 0); - - if (get_attr_cirrus (first) == CIRRUS_MOVE - && GET_CODE (XEXP (body, 1)) == REG - && arm_regno == REGNO (XEXP (body, 1))) - emit_insn_after (gen_nop (), first); + if (len) + *len = l; + return insn; +} - return; - } - } +static rtx +cirrus_prev_mach_insn (rtx insn, int *len) +{ + return cirrus_prev_next_mach_insn (insn, len, 0); +} +static rtx +cirrus_next_mach_insn (rtx insn, int *len) +{ + return cirrus_prev_next_mach_insn (insn, len, 1); +} - /* get_attr aborts on USE and CLOBBER. */ - if (!first - || GET_CODE (first) != INSN - || GET_CODE (PATTERN (first)) == USE - || GET_CODE (PATTERN (first)) == CLOBBER) +/* Cirrus reorg for branch slots. */ +static void +cirrus_reorg_branch (rtx insn) +{ + rtx t; + int nops, l; + /* TODO: handle jump-tables. */ + t = is_jump_table (insn); + if (t) return; - attr = get_attr_cirrus (first); - - /* Any coprocessor compare instruction (cfcmps, cfcmpd, ...) - must be followed by a non-coprocessor instruction. */ - if (attr == CIRRUS_COMPARE) + /* Any branch must be followed by 2 non Cirrus instructions. */ + t = insn; + for (nops = 2; nops > 0; ) { - nops = 0; - - t = next_nonnote_insn (first); - + if (!cirrus_next_mach_insn (t, 0)) + { + insn = t; + break; + } + t = cirrus_next_mach_insn (t, &l); if (arm_cirrus_insn_p (t)) - ++ nops; - - if (arm_cirrus_insn_p (next_nonnote_insn (t))) - ++ nops; + break; + nops -= l; + } - while (nops --) - emit_insn_after (gen_nop (), first); + while (nops-- > 0) + emit_insn_after (gen_nop (), insn); +} - return; +/* Cirrus reorg for bug #1 (cirrus + cfcmpxx). */ +static void +cirrus_reorg_bug1 (rtx insn) +{ + rtx body = PATTERN (insn), body2; + rtx t; + int i, nops, l; + enum attr_cirrus attr; + /* Check if destination or clobber is Cirrus register. */ + if (GET_CODE (body) == PARALLEL) + { + for (i = 0; i < XVECLEN (body, 0); i++) + { + body2 = XVECEXP (body, 0, i); + if (GET_CODE (body2) == SET) + { + if (cirrus_dest_regn_p (body2, LAST_CIRRUS_FP_REGNUM)) + { + nops = 5; + goto fix; + } + } + else if (GET_CODE (body2) == CLOBBER) + { + if (cirrus_dest_regn_p (body2, LAST_CIRRUS_FP_REGNUM)) + { + nops = 4; + goto fix; + } + } + } + } + else if (GET_CODE (body) == SET) + { + if (cirrus_dest_regn_p (body, LAST_CIRRUS_FP_REGNUM)) + { + nops = 5; + goto fix; + } + } + return; + +fix: + t = insn; + for ( ; nops > 0; ) + { + t = cirrus_next_mach_insn (t, &l); + if (!t) + break; + if (GET_CODE (t) == JUMP_INSN + || GET_CODE (t) == CALL_INSN) + { + nops -= l; + break; + } + else if (arm_cirrus_insn_p (t)) + { + attr = get_attr_cirrus (t); + if (attr == CIRRUS_COMPARE) + break; + } + nops -= l; } + + while (nops-- > 0) + emit_insn_after (gen_nop (), insn); +} +/* Cirrus reorg for bugs #10 and #12 (data aborts). */ +static void +cirrus_reorg_bug10_12 (rtx insn) +{ + rtx t; + t = cirrus_next_mach_insn (insn, 0); + if (arm_cirrus_insn_p (t)) + if (TARGET_CIRRUS_D0 || + get_attr_cirrus (t) == CIRRUS_DOUBLE) + emit_insn_after (gen_nop (), insn); } /* Return nonzero if OP is a constant power of two. */ @@ -4586,7 +4648,7 @@ if (GET_CODE (XEXP (a, 0)) == PLUS) { reg0 = XEXP (XEXP (a, 0), 0); - val0 = INTVAL (XEXP (XEXP (a, 0), 1)); + val0 = INTVAL (XEXP (XEXP (a, 0), 0)); } else reg0 = XEXP (a, 0); @@ -6684,7 +6746,7 @@ { Mnode * mp; Mnode * nmp; - int align64 = 0; + int align64 = 0, stuffnop = 0; if (TARGET_REALLY_IWMMXT) for (mp = minipool_vector_head; mp != NULL; mp = mp->next) @@ -6699,8 +6761,27 @@ ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n", INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4); + /* Check if branch before minipool is already stuffed with nops. */ + if (TARGET_CIRRUS_D0 || TARGET_CIRRUS_D1) + { + rtx t; + + t = prev_active_insn (scan); + if (GET_CODE (t) != INSN + || PATTERN (t) != const0_rtx) + stuffnop = 1; + } + scan = emit_label_after (gen_label_rtx (), scan); scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan); + + /* Last instruction was branch, so put two non-Cirrus opcodes. */ + if (stuffnop) + { + emit_insn_before (gen_nop (), scan); + emit_insn_before (gen_nop (), scan); + } + scan = emit_label_after (minipool_vector_label, scan); for (mp = minipool_vector_head; mp != NULL; mp = nmp) @@ -7048,15 +7129,38 @@ if (GET_CODE (insn) != NOTE) abort (); + /* Scan all the insn and fix Cirrus issues. */ + if (TARGET_CIRRUS_D0 || TARGET_CIRRUS_D1) + { + rtx t, s; + + for (t = cirrus_next_mach_insn (insn, 0); t; t = cirrus_next_mach_insn (t, 0)) + if (arm_mem_access_p (t)) + cirrus_reorg_bug10_12 (t); + + if (TARGET_CIRRUS_D0) + for (t = cirrus_next_mach_insn (insn, 0); t; t = cirrus_next_mach_insn (t, 0)) + if (arm_cirrus_insn_p (t)) + cirrus_reorg_bug1 (t); + + /* Find last insn. */ + for (t = insn; ; t = s) + { + s = cirrus_next_mach_insn (t, 0); + if (!s) + break; + } + + /* Scan backward and fix branches. */ + for ( ; t; t = cirrus_prev_mach_insn (t, 0)) + if (GET_CODE (t) == JUMP_INSN + || GET_CODE (t) == CALL_INSN) + cirrus_reorg_branch (t); + } + /* Scan all the insns and record the operands that will need fixing. */ for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn)) { - if (TARGET_CIRRUS_FIX_INVALID_INSNS - && (arm_cirrus_insn_p (insn) - || GET_CODE (insn) == JUMP_INSN - || arm_memory_load_p (insn))) - cirrus_reorg (insn); - if (GET_CODE (insn) == BARRIER) push_minipool_barrier (insn, address); else if (INSN_P (insn)) @@ -8600,7 +8704,19 @@ { int vfp_offset = 4; - if (arm_fpu_arch == FPUTYPE_FPA_EMU2) + if (TARGET_CIRRUS) + { + for (reg = FIRST_CIRRUS_FP_REGNUM; reg <= LAST_CIRRUS_FP_REGNUM; reg++) + if (regs_ever_live[reg] && !call_used_regs[reg]) + { + floats_offset += 8; + if (TARGET_CIRRUS_D0 || TARGET_CIRRUS_D1) + asm_fprintf (f, "\tnop\n"); + asm_fprintf (f, "\tcfldrd\tmvd%d, [%r, #-%d]\n", + reg - FIRST_CIRRUS_FP_REGNUM, FP_REGNUM, floats_offset - vfp_offset); + } + } + else if (arm_fpu_arch == FPUTYPE_FPA_EMU2) { for (reg = LAST_ARM_FP_REGNUM; reg >= FIRST_ARM_FP_REGNUM; reg--) if (regs_ever_live[reg] && !call_used_regs[reg]) @@ -8717,7 +8833,18 @@ output_add_immediate (operands); } - if (arm_fpu_arch == FPUTYPE_FPA_EMU2) + if (TARGET_CIRRUS) + { + for (reg = LAST_CIRRUS_FP_REGNUM; reg >= FIRST_CIRRUS_FP_REGNUM; reg--) + if (regs_ever_live[reg] && !call_used_regs[reg]) + { + if (TARGET_CIRRUS_D0 || TARGET_CIRRUS_D1) + asm_fprintf (f, "\tnop\n"); + asm_fprintf (f, "\tcfldrd\tmvd%u, [%r], #8\n", + reg - FIRST_CIRRUS_FP_REGNUM, SP_REGNUM); + } + } + else if (arm_fpu_arch == FPUTYPE_FPA_EMU2) { for (reg = FIRST_ARM_FP_REGNUM; reg <= LAST_ARM_FP_REGNUM; reg++) if (regs_ever_live[reg] && !call_used_regs[reg]) @@ -9139,9 +9266,18 @@ /* If the hard floating point registers are going to be used then they must be saved on the stack as well. Each register occupies 12 bytes of stack space. */ - for (reg = FIRST_ARM_FP_REGNUM; reg <= LAST_ARM_FP_REGNUM; reg++) - if (regs_ever_live[reg] && ! call_used_regs[reg]) - call_saved_registers += 12; + if (TARGET_CIRRUS) + { + for (reg = FIRST_CIRRUS_FP_REGNUM; reg <= LAST_CIRRUS_FP_REGNUM; reg++) + if (regs_ever_live[reg] && !call_used_regs[reg]) + call_saved_registers += 8; + } + else + { + for (reg = FIRST_ARM_FP_REGNUM; reg <= LAST_ARM_FP_REGNUM; reg++) + if (regs_ever_live[reg] && ! call_used_regs[reg]) + call_saved_registers += 12; + } if (TARGET_REALLY_IWMMXT) /* Check for the call-saved iWMMXt registers. */ @@ -9284,9 +9420,18 @@ /* Space for saved FPA registers. */ if (! IS_VOLATILE (func_type)) { - for (regno = FIRST_ARM_FP_REGNUM; regno <= LAST_ARM_FP_REGNUM; regno++) - if (regs_ever_live[regno] && ! call_used_regs[regno]) - entry_size += 12; + if (TARGET_CIRRUS) + { + for (regno = FIRST_CIRRUS_FP_REGNUM; regno <= LAST_CIRRUS_FP_REGNUM; regno++) + if (regs_ever_live[regno] && !call_used_regs[regno]) + entry_size += 8; + } + else + { + for (regno = FIRST_ARM_FP_REGNUM; regno <= LAST_ARM_FP_REGNUM; regno++) + if (regs_ever_live[regno] && ! call_used_regs[regno]) + entry_size += 12; + } } if (TARGET_REALLY_IWMMXT) @@ -9483,7 +9628,19 @@ { /* Save any floating point call-saved registers used by this function. */ - if (arm_fpu_arch == FPUTYPE_FPA_EMU2) + if (TARGET_CIRRUS) + { + for (reg = FIRST_CIRRUS_FP_REGNUM; reg <= LAST_CIRRUS_FP_REGNUM; reg++) + if (regs_ever_live[reg] && !call_used_regs[reg]) + { + insn = gen_rtx_PRE_DEC (DFmode, stack_pointer_rtx); + insn = gen_rtx_MEM (DFmode, insn); + insn = emit_insn (gen_rtx_SET (VOIDmode, insn, + gen_rtx_REG (DFmode, reg))); + RTX_FRAME_RELATED_P (insn) = 1; + } + } + else if (arm_fpu_arch == FPUTYPE_FPA_EMU2) { for (reg = LAST_ARM_FP_REGNUM; reg >= FIRST_ARM_FP_REGNUM; reg--) if (regs_ever_live[reg] && !call_used_regs[reg]) @@ -10033,16 +10190,16 @@ { case GE: return ARM_GE; case GT: return ARM_GT; - case LE: return ARM_LS; - case LT: return ARM_MI; + case LE: return (TARGET_CIRRUS) ? ARM_LE : ARM_LS; + case LT: return (TARGET_CIRRUS) ? ARM_LT : ARM_MI; case NE: return ARM_NE; case EQ: return ARM_EQ; case ORDERED: return ARM_VC; case UNORDERED: return ARM_VS; case UNLT: return ARM_LT; case UNLE: return ARM_LE; - case UNGT: return ARM_HI; - case UNGE: return ARM_PL; + case UNGT: return (TARGET_CIRRUS) ? ARM_GT : ARM_HI; + case UNGE: return (TARGET_CIRRUS) ? ARM_GE : ARM_PL; /* UNEQ and LTGT do not have a representation. */ case UNEQ: /* Fall through. */ case LTGT: /* Fall through. */ @@ -10360,16 +10517,10 @@ || get_attr_conds (this_insn) != CONDS_NOCOND) fail = TRUE; - /* A conditional cirrus instruction must be followed by - a non Cirrus instruction. However, since we - conditionalize instructions in this function and by - the time we get here we can't add instructions - (nops), because shorten_branches() has already been - called, we will disable conditionalizing Cirrus - instructions to be safe. */ - if (GET_CODE (scanbody) != USE - && GET_CODE (scanbody) != CLOBBER - && get_attr_cirrus (this_insn) != CIRRUS_NOT) + /* To avoid erratic behaviour, we avoid conditional Cirrus + instructions when doing workarounds. */ + if (arm_cirrus_insn_p(this_insn) + && (TARGET_CIRRUS_D0 || TARGET_CIRRUS_D1)) fail = TRUE; break; diff -Nru gcc-3.4.4.ori/gcc/config/arm/arm.h gcc-3.4.4.patched/gcc/config/arm/arm.h --- gcc-3.4.4.ori/gcc/config/arm/arm.h 2004-08-25 17:55:55.000000000 +0200 +++ gcc-3.4.4.patched/gcc/config/arm/arm.h 2005-05-30 23:46:26.000000000 +0200 @@ -5,6 +5,7 @@ and Martin Simmons (@harleqn.co.uk). More major hacks by Richard Earnshaw (rearnsha@arm.com) Minor hacks by Nick Clifton (nickc@cygnus.com) + Cirrus Crunch fixes by Vladimir Ivanov (vladitx@nucleusys.com) This file is part of GCC. @@ -199,7 +200,9 @@ %{msoft-float:%{mhard-float: \ %e-msoft-float and -mhard_float may not be used together}} \ %{mbig-endian:%{mlittle-endian: \ - %e-mbig-endian and -mlittle-endian may not be used together}}" + %e-mbig-endian and -mlittle-endian may not be used together}} \ +%{mfix-crunch-d0:%{mfix-crunch-d1: \ + %e-mfix-crunch-d0 and -mfix-crunch-d1 may not be used together}}" /* Set the architecture define -- if -march= is set, then it overrides the -mcpu= setting. */ @@ -403,8 +406,10 @@ /* Nonzero means to use ARM/Thumb Procedure Call Standard conventions. */ #define ARM_FLAG_ATPCS (1 << 22) -/* Fix invalid Cirrus instruction combinations by inserting NOPs. */ -#define CIRRUS_FIX_INVALID_INSNS (1 << 23) +/* Nonzero means target uses Cirrus Crunch coprocessor revision D0. */ +#define ARM_FLAG_CIRRUS_D0 (1 << 23) +/* Nonzero means target uses Cirrus Crunch coprocessor revision D1. */ +#define ARM_FLAG_CIRRUS_D1 (1 << 25) #define TARGET_APCS_FRAME (target_flags & ARM_FLAG_APCS_FRAME) #define TARGET_POKE_FUNCTION_NAME (target_flags & ARM_FLAG_POKE) @@ -417,7 +422,9 @@ #define TARGET_MMU_TRAPS (target_flags & ARM_FLAG_MMU_TRAPS) #define TARGET_SOFT_FLOAT (target_flags & ARM_FLAG_SOFT_FLOAT) #define TARGET_HARD_FLOAT (! TARGET_SOFT_FLOAT) -#define TARGET_CIRRUS (arm_is_cirrus) +#define TARGET_CIRRUS (arm_arch_cirrus) +#define TARGET_CIRRUS_D0 (target_flags & ARM_FLAG_CIRRUS_D0) +#define TARGET_CIRRUS_D1 (target_flags & ARM_FLAG_CIRRUS_D1) #define TARGET_ANY_HARD_FLOAT (TARGET_HARD_FLOAT || TARGET_CIRRUS) #define TARGET_IWMMXT (arm_arch_iwmmxt) #define TARGET_REALLY_IWMMXT (TARGET_IWMMXT && TARGET_ARM) @@ -511,10 +518,10 @@ N_("Thumb: Assume function pointers may go to non-Thumb aware code") }, \ {"no-caller-super-interworking", -THUMB_FLAG_CALLER_SUPER_INTERWORKING, \ "" }, \ - {"cirrus-fix-invalid-insns", CIRRUS_FIX_INVALID_INSNS, \ - N_("Cirrus: Place NOPs to avoid invalid instruction combinations") }, \ - {"no-cirrus-fix-invalid-insns", -CIRRUS_FIX_INVALID_INSNS, \ - N_("Cirrus: Do not break up invalid instruction combinations with NOPs") },\ + {"fix-crunch-d0", ARM_FLAG_CIRRUS_D0, \ + N_("Cirrus: workarounds for Crunch coprocessor revision D0") }, \ + {"fix-crunch-d1", ARM_FLAG_CIRRUS_D1, \ + N_("Cirrus: workarounds for Crunch coprocessor revision D1") }, \ SUBTARGET_SWITCHES \ {"", TARGET_DEFAULT, "" } \ } @@ -631,7 +638,7 @@ extern int arm_is_strong; /* Nonzero if this chip is a Cirrus variant. */ -extern int arm_is_cirrus; +extern int arm_arch_cirrus; /* Nonzero if this chip supports Intel XScale with Wireless MMX technology. */ extern int arm_arch_iwmmxt; diff -Nru gcc-3.4.4.ori/gcc/config/arm/arm.md gcc-3.4.4.patched/gcc/config/arm/arm.md --- gcc-3.4.4.ori/gcc/config/arm/arm.md 2005-01-25 13:50:34.000000000 +0100 +++ gcc-3.4.4.patched/gcc/config/arm/arm.md 2005-05-30 23:46:26.000000000 +0200 @@ -6507,10 +6507,12 @@ ) ;; Cirrus DI compare instruction +;; This is disabled and left go through ARM core registers, because currently +;; Crunch coprocessor does only signed comparison. (define_expand "cmpdi" [(match_operand:DI 0 "cirrus_fp_register" "") (match_operand:DI 1 "cirrus_fp_register" "")] - "TARGET_ARM && TARGET_CIRRUS" + "TARGET_ARM && TARGET_CIRRUS && 0" "{ arm_compare_op0 = operands[0]; arm_compare_op1 = operands[1]; @@ -6521,7 +6523,7 @@ [(set (reg:CC CC_REGNUM) (compare:CC (match_operand:DI 0 "cirrus_fp_register" "v") (match_operand:DI 1 "cirrus_fp_register" "v")))] - "TARGET_ARM && TARGET_CIRRUS" + "TARGET_ARM && TARGET_CIRRUS && 0" "cfcmp64%?\\tr15, %V0, %V1" [(set_attr "type" "mav_farith") (set_attr "cirrus" "compare")] @@ -10046,6 +10048,7 @@ [(unspec:SI [(match_operand:SI 0 "register_operand" "")] UNSPEC_PROLOGUE_USE)] "" "%@ %0 needed for prologue" + [(set_attr "length" "0")] ) ;; Load the FPA co-processor patterns diff -Nru gcc-3.4.4.ori/gcc/config/arm/cirrus.md gcc-3.4.4.patched/gcc/config/arm/cirrus.md --- gcc-3.4.4.ori/gcc/config/arm/cirrus.md 2003-03-09 18:07:45.000000000 +0100 +++ gcc-3.4.4.patched/gcc/config/arm/cirrus.md 2005-05-30 23:46:26.000000000 +0200 @@ -348,7 +348,8 @@ (clobber (match_scratch:DF 2 "=v"))] "TARGET_ARM && TARGET_CIRRUS" "cftruncd32%?\\t%Y2, %V1\;cfmvr64l%?\\t%0, %Z2" - [(set_attr "length" "8")] + [(set_attr "length" "8") + (set_attr "cirrus" "normal")] ) (define_insn "*cirrus_truncdfsf2" diff -Nru gcc-3.4.4.ori/gcc/config/arm/elf.h gcc-3.4.4.patched/gcc/config/arm/elf.h --- gcc-3.4.4.ori/gcc/config/arm/elf.h 2004-02-24 15:25:22.000000000 +0100 +++ gcc-3.4.4.patched/gcc/config/arm/elf.h 2005-05-30 23:46:26.000000000 +0200 @@ -46,7 +46,7 @@ #ifndef SUBTARGET_ASM_FLOAT_SPEC #define SUBTARGET_ASM_FLOAT_SPEC "\ -%{mapcs-float:-mfloat} %{msoft-float:-mfpu=softfpa}" +%{mapcs-float:-mfloat} %{msoft-float:-mfpu=softfpa} %{mcpu=ep9312:-mfpu=maverick}" #endif #ifndef ASM_SPEC diff -Nru gcc-3.4.4.ori/gcc/config/arm/ieee754-df-crunch.S gcc-3.4.4.patched/gcc/config/arm/ieee754-df-crunch.S --- gcc-3.4.4.ori/gcc/config/arm/ieee754-df-crunch.S 1970-01-01 01:00:00.000000000 +0100 +++ gcc-3.4.4.patched/gcc/config/arm/ieee754-df-crunch.S 2005-05-30 23:46:26.000000000 +0200 @@ -0,0 +1,382 @@ +/* ieee754-df.S double-precision floating point support for ARM / Crunch + + Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc. + Contributed by Nicolas Pitre (nico@cam.org) + Adapted to Crunch by Vladimir Ivanov (vladitx@nucleusys.com) + + This file is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the + Free Software Foundation; either version 2, or (at your option) any + later version. + + In addition to the permissions in the GNU General Public License, the + Free Software Foundation gives you unlimited permission to link the + compiled version of this file into combinations with other programs, + and to distribute those combinations without any restriction coming + from the use of this file. (The General Public License restrictions + do apply in other respects; for example, they cover modification of + the file, and distribution when not linked into a combine + executable.) + + This file is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; see the file COPYING. If not, write to + the Free Software Foundation, 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +/* + * Notes: + * + * The goal of this code is to be as fast as possible. This is + * not meant to be easy to understand for the casual reader. + * For slightly simpler code please see the single precision version + * of this file. + * + * Only the default rounding mode is intended for best performances. + * Exceptions aren't supported yet, but that can be added quite easily + * if necessary without impacting performances. + */ + + +@ For MaverickCrunch, float words are always little-endian. +#define xl r0 +#define xh r1 +#define yl r2 +#define yh r3 + + +#ifdef L_muldivdf3 + +ARM_FUNC_START divdf3 + + stmfd sp!, {r4, r5, r6, lr} + + @ Mask out exponents, trap any zero/denormal/INF/NAN. + mov ip, #0xff + orr ip, ip, #0x700 + ands r4, ip, xh, lsr #20 + andnes r5, ip, yh, lsr #20 + teqne r4, ip + teqne r5, ip + bleq LSYM(Ldv_s) + + @ Substract divisor exponent from dividend''s. + sub r4, r4, r5 + + @ Preserve final sign into lr. + eor lr, xh, yh + + @ Convert mantissa to unsigned integer. + @ Dividend -> r5-r6, divisor -> yh-yl. + orrs r5, yl, yh, lsl #12 + mov xh, xh, lsl #12 + beq LSYM(Ldv_1) + mov yh, yh, lsl #12 + mov r5, #0x10000000 + orr yh, r5, yh, lsr #4 + orr yh, yh, yl, lsr #24 + mov yl, yl, lsl #8 + orr r5, r5, xh, lsr #4 + orr r5, r5, xl, lsr #24 + mov r6, xl, lsl #8 + + @ Initialize xh with final sign bit. + and xh, lr, #0x80000000 + + @ Ensure result will land to known bit position. + @ Apply exponent bias accordingly. + cmp r5, yh + cmpeq r6, yl + adc r4, r4, #(255 - 2) + add r4, r4, #0x300 + bcs 1f + movs yh, yh, lsr #1 + mov yl, yl, rrx +1: + @ Perform first substraction to align result to a nibble. + subs r6, r6, yl + sbc r5, r5, yh + movs yh, yh, lsr #1 + mov yl, yl, rrx + mov xl, #0x00100000 + mov ip, #0x00080000 + + @ The actual division loop. +1: subs lr, r6, yl + sbcs lr, r5, yh + subcs r6, r6, yl + movcs r5, lr + orrcs xl, xl, ip + movs yh, yh, lsr #1 + mov yl, yl, rrx + subs lr, r6, yl + sbcs lr, r5, yh + subcs r6, r6, yl + movcs r5, lr + orrcs xl, xl, ip, lsr #1 + movs yh, yh, lsr #1 + mov yl, yl, rrx + subs lr, r6, yl + sbcs lr, r5, yh + subcs r6, r6, yl + movcs r5, lr + orrcs xl, xl, ip, lsr #2 + movs yh, yh, lsr #1 + mov yl, yl, rrx + subs lr, r6, yl + sbcs lr, r5, yh + subcs r6, r6, yl + movcs r5, lr + orrcs xl, xl, ip, lsr #3 + + orrs lr, r5, r6 + beq 2f + mov r5, r5, lsl #4 + orr r5, r5, r6, lsr #28 + mov r6, r6, lsl #4 + mov yh, yh, lsl #3 + orr yh, yh, yl, lsr #29 + mov yl, yl, lsl #3 + movs ip, ip, lsr #4 + bne 1b + + @ We are done with a word of the result. + @ Loop again for the low word if this pass was for the high word. + tst xh, #0x00100000 + bne 3f + orr xh, xh, xl + mov xl, #0 + mov ip, #0x80000000 + b 1b +2: + @ Be sure result starts in the high word. + tst xh, #0x00100000 + orreq xh, xh, xl + moveq xl, #0 +3: + @ Check exponent range for under/overflow. + subs ip, r4, #(254 - 1) + cmphi ip, #0x700 + bhi LSYM(Lml_u) + + @ Round the result, merge final exponent. + subs ip, r5, yh + subeqs ip, r6, yl + moveqs ip, xl, lsr #1 + adcs xl, xl, #0 + adc xh, xh, r4, lsl #20 + cfmvdlr mvd0, xl + cfmvdhr mvd0, xh + RETLDM "r4, r5, r6" + + @ Division by 0x1p*: shortcut a lot of code. +LSYM(Ldv_1): + and lr, lr, #0x80000000 + orr xh, lr, xh, lsr #12 + adds r4, r4, ip, lsr #1 + rsbgts r5, r4, ip + orrgt xh, xh, r4, lsl #20 + cfmvdlr mvd0, xl + cfmvdhr mvd0, xh + RETLDM "r4, r5, r6" gt + + orr xh, xh, #0x00100000 + mov lr, #0 + subs r4, r4, #1 + b LSYM(Lml_u) + + @ Result mightt need to be denormalized: put remainder bits + @ in lr for rounding considerations. +LSYM(Ldv_u): + orr lr, r5, r6 + b LSYM(Lml_u) + + @ One or both arguments is either INF, NAN or zero. +LSYM(Ldv_s): + and r5, ip, yh, lsr #20 + teq r4, ip + teqeq r5, ip + beq LSYM(Lml_n) @ INF/NAN / INF/NAN -> NAN + teq r4, ip + bne 1f + orrs r4, xl, xh, lsl #12 + bne LSYM(Lml_n) @ NAN / -> NAN + teq r5, ip + bne LSYM(Lml_i) @ INF / -> INF + mov xl, yl + mov xh, yh + b LSYM(Lml_n) @ INF / (INF or NAN) -> NAN +1: teq r5, ip + bne 2f + orrs r5, yl, yh, lsl #12 + beq LSYM(Lml_z) @ / INF -> 0 + mov xl, yl + mov xh, yh + b LSYM(Lml_n) @ / NAN -> NAN +2: @ If both are non-zero, we need to normalize and resume above. + orrs r6, xl, xh, lsl #1 + orrnes r6, yl, yh, lsl #1 + bne LSYM(Lml_d) + @ One or both arguments are 0. + orrs r4, xl, xh, lsl #1 + bne LSYM(Lml_i) @ / 0 -> INF + orrs r5, yl, yh, lsl #1 + bne LSYM(Lml_z) @ 0 / -> 0 +#if 0 + b LSYM(Lml_n) @ 0 / 0 -> NAN +#endif + + @ Return a quiet NAN. +LSYM(Lml_n): + orr xh, xh, #0x7f000000 + orr xh, xh, #0x00f80000 + cfmvdlr mvd0, xl + cfmvdhr mvd0, xh + RETLDM "r4, r5, r6" + +LSYM(Lml_u): + @ Overflow? + bgt LSYM(Lml_o) + + @ Check if denormalized result is possible, otherwise return signed 0. + cmn r4, #(53 + 1) + movle xl, #0 + bicle xh, xh, #0x7fffffff + cfmvdlr mvd0, xl + cfmvdhr mvd0, xh + RETLDM "r4, r5, r6" le + + @ Find out proper shift value. + rsb r4, r4, #0 + subs r4, r4, #32 + bge 2f + adds r4, r4, #12 + bgt 1f + + @ shift result right of 1 to 20 bits, preserve sign bit, round, etc. + add r4, r4, #20 + rsb r5, r4, #32 + mov r3, xl, lsl r5 + mov xl, xl, lsr r4 + orr xl, xl, xh, lsl r5 + and r2, xh, #0x80000000 + bic xh, xh, #0x80000000 + adds xl, xl, r3, lsr #31 + adc xh, r2, xh, lsr r4 + orrs lr, lr, r3, lsl #1 + biceq xl, xl, r3, lsr #31 + cfmvdlr mvd0, xl + cfmvdhr mvd0, xh + RETLDM "r4, r5, r6" + + @ shift result right of 21 to 31 bits, or left 11 to 1 bits after + @ a register switch from xh to xl. Then round. +1: rsb r4, r4, #12 + rsb r5, r4, #32 + mov r3, xl, lsl r4 + mov xl, xl, lsr r5 + orr xl, xl, xh, lsl r4 + bic xh, xh, #0x7fffffff + adds xl, xl, r3, lsr #31 + adc xh, xh, #0 + orrs lr, lr, r3, lsl #1 + biceq xl, xl, r3, lsr #31 + cfmvdlr mvd0, xl + cfmvdhr mvd0, xh + RETLDM "r4, r5, r6" + + @ Shift value right of 32 to 64 bits, or 0 to 32 bits after a switch + @ from xh to xl. Leftover bits are in r3-r6-lr for rounding. +2: rsb r5, r4, #32 + orr lr, lr, xl, lsl r5 + mov r3, xl, lsr r4 + orr r3, r3, xh, lsl r5 + mov xl, xh, lsr r4 + bic xh, xh, #0x7fffffff + bic xl, xl, xh, lsr r4 + add xl, xl, r3, lsr #31 + orrs lr, lr, r3, lsl #1 + biceq xl, xl, r3, lsr #31 + cfmvdlr mvd0, xl + cfmvdhr mvd0, xh + RETLDM "r4, r5, r6" + + @ One or both arguments are denormalized. + @ Scale them leftwards and preserve sign bit. +LSYM(Lml_d): + teq r4, #0 + bne 2f + and r6, xh, #0x80000000 +1: movs xl, xl, lsl #1 + adc xh, xh, xh + tst xh, #0x00100000 + subeq r4, r4, #1 + beq 1b + orr xh, xh, r6 + teq r5, #0 + movne pc, lr +2: and r6, yh, #0x80000000 +3: movs yl, yl, lsl #1 + adc yh, yh, yh + tst yh, #0x00100000 + subeq r5, r5, #1 + beq 3b + orr yh, yh, r6 + mov pc, lr + + @ Result is 0, but determine sign anyway. +LSYM(Lml_z): + eor xh, xh, yh + bic xh, xh, #0x7fffffff + mov xl, #0 + cfmvdlr mvd0, xl + cfmvdhr mvd0, xh + RETLDM "r4, r5, r6" + + @ Result is INF, but we need to determine its sign. +LSYM(Lml_i): + eor xh, xh, yh + + @ Overflow: return INF (sign already in xh). +LSYM(Lml_o): + and xh, xh, #0x80000000 + orr xh, xh, #0x7f000000 + orr xh, xh, #0x00f00000 + mov xl, #0 + cfmvdlr mvd0, xl + cfmvdhr mvd0, xh + RETLDM "r4, r5, r6" + + FUNC_END divdf3 + +#endif /* L_muldivdf3 */ + +#ifdef L_unorddf2 + +ARM_FUNC_START unorddf2 + str lr, [sp, #-4]! + mov ip, #0x7f000000 + orr ip, ip, #0x00f00000 + and lr, xh, ip + teq lr, ip + bne 1f + orrs xl, xl, xh, lsl #12 + bne 3f @ x is NAN +1: and lr, yh, ip + teq lr, ip + bne 2f + orrs yl, yl, yh, lsl #12 + bne 3f @ y is NAN +2: mov r0, #0 @ arguments are ordered. + RETLDM + +3: mov r0, #1 @ arguments are unordered. + RETLDM + + FUNC_END unorddf2 + +#endif /* L_unorddf2 */ diff -Nru gcc-3.4.4.ori/gcc/config/arm/ieee754-sf-crunch.S gcc-3.4.4.patched/gcc/config/arm/ieee754-sf-crunch.S --- gcc-3.4.4.ori/gcc/config/arm/ieee754-sf-crunch.S 1970-01-01 01:00:00.000000000 +0100 +++ gcc-3.4.4.patched/gcc/config/arm/ieee754-sf-crunch.S 2005-05-30 23:46:26.000000000 +0200 @@ -0,0 +1,246 @@ +/* ieee754-sf.S single-precision floating point support for ARM / Crunch + + Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc. + Contributed by Nicolas Pitre (nico@cam.org) + Adapted to Crunch by Vladimir Ivanov (vladitx@nucleusys.com) + + This file is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the + Free Software Foundation; either version 2, or (at your option) any + later version. + + In addition to the permissions in the GNU General Public License, the + Free Software Foundation gives you unlimited permission to link the + compiled version of this file into combinations with other programs, + and to distribute those combinations without any restriction coming + from the use of this file. (The General Public License restrictions + do apply in other respects; for example, they cover modification of + the file, and distribution when not linked into a combine + executable.) + + This file is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; see the file COPYING. If not, write to + the Free Software Foundation, 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +/* + * Notes: + * + * The goal of this code is to be as fast as possible. This is + * not meant to be easy to understand for the casual reader. + * + * Only the default rounding mode is intended for best performances. + * Exceptions aren't supported yet, but that can be added quite easily + * if necessary without impacting performances. + */ + +#ifdef L_muldivsf3 + +ARM_FUNC_START divsf3 + + @ Mask out exponents, trap any zero/denormal/INF/NAN. + mov ip, #0xff + ands r2, ip, r0, lsr #23 + andnes r3, ip, r1, lsr #23 + teqne r2, ip + teqne r3, ip + beq LSYM(Ldv_s) +LSYM(Ldv_x): + + @ Substract divisor exponent from dividend''s + sub r2, r2, r3 + + @ Preserve final sign into ip. + eor ip, r0, r1 + + @ Convert mantissa to unsigned integer. + @ Dividend -> r3, divisor -> r1. + movs r1, r1, lsl #9 + mov r0, r0, lsl #9 + beq LSYM(Ldv_1) + mov r3, #0x10000000 + orr r1, r3, r1, lsr #4 + orr r3, r3, r0, lsr #4 + + @ Initialize r0 (result) with final sign bit. + and r0, ip, #0x80000000 + + @ Ensure result will land to known bit position. + @ Apply exponent bias accordingly. + cmp r3, r1 + movcc r3, r3, lsl #1 + adc r2, r2, #(127 - 2) + + @ The actual division loop. + mov ip, #0x00800000 +1: cmp r3, r1 + subcs r3, r3, r1 + orrcs r0, r0, ip + cmp r3, r1, lsr #1 + subcs r3, r3, r1, lsr #1 + orrcs r0, r0, ip, lsr #1 + cmp r3, r1, lsr #2 + subcs r3, r3, r1, lsr #2 + orrcs r0, r0, ip, lsr #2 + cmp r3, r1, lsr #3 + subcs r3, r3, r1, lsr #3 + orrcs r0, r0, ip, lsr #3 + movs r3, r3, lsl #4 + movnes ip, ip, lsr #4 + bne 1b + + @ Check exponent for under/overflow. + cmp r2, #(254 - 1) + bhi LSYM(Lml_u) + + @ Round the result, merge final exponent. + cmp r3, r1 + adc r0, r0, r2, lsl #23 + biceq r0, r0, #1 + cfmvsr mvf0, r0 + RET + + @ Division by 0x1p*: let''s shortcut a lot of code. +LSYM(Ldv_1): + and ip, ip, #0x80000000 + orr r0, ip, r0, lsr #9 + adds r2, r2, #127 + rsbgts r3, r2, #255 + orrgt r0, r0, r2, lsl #23 + cfmvsr mvf0, r0 + RETc(gt) + + orr r0, r0, #0x00800000 + mov r3, #0 + subs r2, r2, #1 + b LSYM(Lml_u) + + @ One or both arguments are denormalized. + @ Scale them leftwards and preserve sign bit. +LSYM(Ldv_d): + teq r2, #0 + and ip, r0, #0x80000000 +1: moveq r0, r0, lsl #1 + tsteq r0, #0x00800000 + subeq r2, r2, #1 + beq 1b + orr r0, r0, ip + teq r3, #0 + and ip, r1, #0x80000000 +2: moveq r1, r1, lsl #1 + tsteq r1, #0x00800000 + subeq r3, r3, #1 + beq 2b + orr r1, r1, ip + b LSYM(Ldv_x) + + @ One or both arguments are either INF, NAN, zero or denormalized. +LSYM(Ldv_s): + and r3, ip, r1, lsr #23 + teq r2, ip + bne 1f + movs r2, r0, lsl #9 + bne LSYM(Lml_n) @ NAN / -> NAN + teq r3, ip + bne LSYM(Lml_i) @ INF / -> INF + mov r0, r1 + b LSYM(Lml_n) @ INF / (INF or NAN) -> NAN +1: teq r3, ip + bne 2f + movs r3, r1, lsl #9 + beq LSYM(Lml_z) @ / INF -> 0 + mov r0, r1 + b LSYM(Lml_n) @ / NAN -> NAN +2: @ If both are non-zero, we need to normalize and resume above. + bics ip, r0, #0x80000000 + bicnes ip, r1, #0x80000000 + bne LSYM(Ldv_d) + @ One or both arguments are zero. + bics r2, r0, #0x80000000 + bne LSYM(Lml_i) @ / 0 -> INF + bics r3, r1, #0x80000000 + bne LSYM(Lml_z) @ 0 / -> 0 +#if 0 + b LSYM(Lml_n) @ 0 / 0 -> NAN +#endif + + @ Return a quiet NAN. +LSYM(Lml_n): + orr r0, r0, #0x7f000000 + orr r0, r0, #0x00c00000 + cfmvsr mvf0, r0 + RET + +LSYM(Lml_u): + @ Overflow? + bgt LSYM(Lml_o) + + @ Check if denormalized result is possible, otherwise return signed 0. + cmn r2, #(24 + 1) + bicle r0, r0, #0x7fffffff + cfmvsr mvf0, r0 + RETc(le) + + @ Shift value right, round, etc. + rsb r2, r2, #0 + movs r1, r0, lsl #1 + mov r1, r1, lsr r2 + rsb r2, r2, #32 + mov ip, r0, lsl r2 + movs r0, r1, rrx + adc r0, r0, #0 + orrs r3, r3, ip, lsl #1 + biceq r0, r0, ip, lsr #31 + cfmvsr mvf0, r0 + RET + + @ Result is 0, but determine sign anyway. +LSYM(Lml_z): + eor r0, r0, r1 + bic r0, r0, #0x7fffffff + cfmvsr mvf0, r0 + RET + + @ Result is INF, but we need to determine its sign. +LSYM(Lml_i): + eor r0, r0, r1 + + @ Overflow: return INF (sign already in r0). +LSYM(Lml_o): + and r0, r0, #0x80000000 + orr r0, r0, #0x7f000000 + orr r0, r0, #0x00800000 + cfmvsr mvf0, r0 + RET + + FUNC_END divsf3 + +#endif /* L_muldivsf3 */ + +#ifdef L_unordsf2 + +ARM_FUNC_START unordsf2 + mov ip, #0xff000000 + and r2, r1, ip, lsr #1 + teq r2, ip, lsr #1 + bne 1f + movs r2, r1, lsl #9 + bne 3f @ r1 is NAN +1: and r2, r0, ip, lsr #1 + teq r2, ip, lsr #1 + bne 2f + movs r2, r0, lsl #9 + bne 3f @ r0 is NAN +2: mov r0, #0 @ arguments are ordered. + RET +3: mov r0, #1 @ arguments are unordered. + RET + + FUNC_END unordsf2 + +#endif /* L_unordsf2 */ diff -Nru gcc-3.4.4.ori/gcc/config/arm/lib1funcs.asm gcc-3.4.4.patched/gcc/config/arm/lib1funcs.asm --- gcc-3.4.4.ori/gcc/config/arm/lib1funcs.asm 2004-01-15 17:56:34.000000000 +0100 +++ gcc-3.4.4.patched/gcc/config/arm/lib1funcs.asm 2005-05-30 23:46:26.000000000 +0200 @@ -954,6 +954,11 @@ #endif /* L_interwork_call_via_rX */ +#ifdef __MAVERICK__ +#include "ieee754-df-crunch.S" +#include "ieee754-sf-crunch.S" +#else /* __MAVERICK__ */ #include "ieee754-df.S" #include "ieee754-sf.S" +#endif /* __MAVERICK__ */ diff -Nru gcc-3.4.4.ori/gcc/config/arm/linux-elf.h gcc-3.4.4.patched/gcc/config/arm/linux-elf.h --- gcc-3.4.4.ori/gcc/config/arm/linux-elf.h 2004-01-31 07:18:11.000000000 +0100 +++ gcc-3.4.4.patched/gcc/config/arm/linux-elf.h 2005-05-30 23:46:26.000000000 +0200 @@ -55,7 +55,11 @@ %{shared:-lc} \ %{!shared:%{profile:-lc_p}%{!profile:-lc}}" +#if (0) #define LIBGCC_SPEC "%{msoft-float:-lfloat} -lgcc" +#else +#define LIBGCC_SPEC "-lgcc" +#endif /* Provide a STARTFILE_SPEC appropriate for GNU/Linux. Here we add the GNU/Linux magical crtbegin.o file (see crtstuff.c) which diff -Nru gcc-3.4.4.ori/gcc/config/arm/t-linux gcc-3.4.4.patched/gcc/config/arm/t-linux --- gcc-3.4.4.ori/gcc/config/arm/t-linux 2003-09-20 23:09:07.000000000 +0200 +++ gcc-3.4.4.patched/gcc/config/arm/t-linux 2005-05-30 23:46:26.000000000 +0200 @@ -1,20 +1,24 @@ # Just for these, we omit the frame pointer since it makes such a big # difference. It is then pointless adding debugging. TARGET_LIBGCC2_CFLAGS = -fomit-frame-pointer -fPIC +TARGET_LIBGCC2_CFLAGS += -mcpu=ep9312 -mfix-crunch-d1 LIBGCC2_DEBUG_CFLAGS = -g0 LIB1ASMSRC = arm/lib1funcs.asm -LIB1ASMFUNCS = _udivsi3 _divsi3 _umodsi3 _modsi3 _dvmd_lnx - -# MULTILIB_OPTIONS = mhard-float/msoft-float -# MULTILIB_DIRNAMES = hard-float soft-float +LIB1ASMFUNCS = _udivsi3 _divsi3 _umodsi3 _modsi3 _dvmd_lnx \ + _negdf2 _addsubdf3 _muldivdf3 _cmpdf2 _unorddf2 _fixdfsi _fixunsdfsi \ + _truncdfsf2 _negsf2 _addsubsf3 _muldivsf3 _cmpsf2 _unordsf2 \ + _fixsfsi _fixunssfsi # If you want to build both APCS variants as multilib options this is how # to do it. # MULTILIB_OPTIONS += mapcs-32/mapcs-26 # MULTILIB_DIRNAMES += apcs-32 apcs-26 -# EXTRA_MULTILIB_PARTS = crtbegin.o crtend.o +# EXTRA_MULTILIB_PARTS = crtbegin.o crtend.o crtbeginS.o crtendS.o + +EXTRA_PARTS = crtbegin.o crtend.o crtbeginS.o crtendS.o +CRTSTUFF_T_CFLAGS += -mcpu=ep9312 -mfix-crunch-d1 # LIBGCC = stmp-multilib # INSTALL_LIBGCC = install-multilib diff -Nru gcc-3.4.4.ori/gcc/doc/invoke.texi gcc-3.4.4.patched/gcc/doc/invoke.texi --- gcc-3.4.4.ori/gcc/doc/invoke.texi 2005-04-22 08:49:59.000000000 +0200 +++ gcc-3.4.4.patched/gcc/doc/invoke.texi 2005-05-30 23:46:26.000000000 +0200 @@ -387,7 +387,7 @@ -msingle-pic-base -mno-single-pic-base @gol -mpic-register=@var{reg} @gol -mnop-fun-dllimport @gol --mcirrus-fix-invalid-insns -mno-cirrus-fix-invalid-insns @gol +-mfix-crunch-d0 -mfix-crunch-d1 @gol -mpoke-function-name @gol -mthumb -marm @gol -mtpcs-frame -mtpcs-leaf-frame @gol @@ -6719,17 +6719,12 @@ Specify the register to be used for PIC addressing. The default is R10 unless stack-checking is enabled, when R9 is used. -@item -mcirrus-fix-invalid-insns -@opindex mcirrus-fix-invalid-insns -@opindex mno-cirrus-fix-invalid-insns -Insert NOPs into the instruction stream to in order to work around -problems with invalid Maverick instruction combinations. This option -is only valid if the @option{-mcpu=ep9312} option has been used to -enable generation of instructions for the Cirrus Maverick floating -point co-processor. This option is not enabled by default, since the -problem is only present in older Maverick implementations. The default -can be re-enabled by use of the @option{-mno-cirrus-fix-invalid-insns} -switch. +@item -mfix-crunch-d0 +@itemx -mfix-crunch-d1 +@opindex mfix-crunch-d0 +@opindex mfix-crunch-d1 +Enable workarounds for the Cirrus MaverickCrunch coprocessor revisions +D0 and D1 respectively. @item -mpoke-function-name @opindex mpoke-function-name