/* $NetBSD: bcopy.S,v 1.8.2.1 2002/11/11 22:21:34 nathanw Exp $ Mach Operating System Copyright (c) 1993 Carnegie Mellon University All Rights Reserved. Permission to use, copy, modify and distribute this software and its documentation is hereby granted,provided that both the copyright notice and this permission notice appear in all copies of the software, derivative works or modified versions, and any portions thereof, and that both notices appear in supporting documentation. CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. Carnegie Mellon requests users of this software to return to * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU * School of Computer Science * Carnegie Mellon University * Pittsburgh PA 15213-3890 ' any improvements or extensions that they make and grant Carnegie Mellon the rights to redistribute these changes. */ /* * Adapted for uClibc from NetBSD bcopy.S,v 1.8.2.1 2002/11/11 */ #include "bcm_mips.h" #ifdef __BIG_ENDIAN__ #define LWHI lwl #define SWHI swl #define LWLO lwr #define SWLO swr #else #define LWHI lwr #define SWHI swr #define LWLO lwl #define SWLO swl #endif /* global leaf function (does not call other functions) */ #define LEAF(name) \ .globl name; \ .ent name; \ name: /* end of a global function */ #define END(name) \ .size name,.-name; \ .end name #ifdef __ABICALLS__ .abicalls #endif /* * bcopy(caddr_t src, caddr_t dst, unsigned int len) * * a0 src address * a1 dst address * a2 length */ #if defined(MEMCOPY) || defined(MEMMOVE) #ifdef MEMCOPY #define FUNCTION memcpy #else #define FUNCTION memmove #endif #define SRCREG a1 #define DSTREG a0 #else #define FUNCTION bcopy #define SRCREG a0 #define DSTREG a1 #endif #define SIZEREG a2 LEAF(FUNCTION) .set noat .set noreorder #if defined(MEMCOPY) || defined(MEMMOVE) /* set up return value, while we still can */ move v0,DSTREG #endif /* * Make sure we can copy forwards. */ sltu t0,SRCREG,DSTREG # t0 == SRCREG < DSTREG bne t0,zero,6f # copy backwards /* * There are four alignment cases (with frequency) * (Based on measurements taken with a DECstation 5000/200 * inside a Mach kernel.) * * aligned -> aligned (mostly) * unaligned -> aligned (sometimes) * aligned,unaligned -> unaligned (almost never) * * Note that we could add another case that checks if * the destination and source are unaligned but the * copy is alignable. eg if src and dest are both * on a halfword boundary. */ andi t1,DSTREG,3 # get last 3 bits of dest bne t1,zero,3f andi t0,SRCREG,3 # get last 3 bits of src bne t0,zero,5f /* * Forward aligned->aligned copy, 8*4 bytes at a time. */ li AT,-32 and t0,SIZEREG,AT # count truncated to multiple of 32 */ addu a3,SRCREG,t0 # run fast loop up to this address sltu AT,SRCREG,a3 # any work to do? beq AT,zero,2f subu SIZEREG,t0 /* * loop body */ 1: # cp lw t3,0(SRCREG) lw v1,4(SRCREG) lw t0,8(SRCREG) lw t1,12(SRCREG) addu SRCREG,32 sw t3,0(DSTREG) sw v1,4(DSTREG) sw t0,8(DSTREG) sw t1,12(DSTREG) lw t1,-4(SRCREG) lw t0,-8(SRCREG) lw v1,-12(SRCREG) lw t3,-16(SRCREG) addu DSTREG,32 sw t1,-4(DSTREG) sw t0,-8(DSTREG) sw v1,-12(DSTREG) bne SRCREG,a3,1b sw t3,-16(DSTREG) /* * Copy a word at a time, no loop unrolling. */ 2: # wordcopy andi t2,SIZEREG,3 # get byte count / 4 subu t2,SIZEREG,t2 # t2 = number of words to copy * 4 beq t2,zero,3f addu t0,SRCREG,t2 # stop at t0 subu SIZEREG,SIZEREG,t2 1: lw t3,0(SRCREG) addu SRCREG,4 sw t3,0(DSTREG) bne SRCREG,t0,1b addu DSTREG,4 3: # bytecopy beq SIZEREG,zero,4f # nothing left to do? nop 1: lb t3,0(SRCREG) addu SRCREG,1 sb t3,0(DSTREG) subu SIZEREG,1 bgtz SIZEREG,1b addu DSTREG,1 4: # copydone j ra nop /* * Copy from unaligned source to aligned dest. */ 5: # destaligned andi t0,SIZEREG,3 # t0 = bytecount mod 4 subu a3,SIZEREG,t0 # number of words to transfer beq a3,zero,3b nop move SIZEREG,t0 # this many to do after we are done addu a3,SRCREG,a3 # stop point 1: LWHI t3,0(SRCREG) LWLO t3,3(SRCREG) addi SRCREG,4 sw t3,0(DSTREG) bne SRCREG,a3,1b addi DSTREG,4 j 3b nop 6: # backcopy -- based on above addu SRCREG,SIZEREG addu DSTREG,SIZEREG andi t1,DSTREG,3 # get last 3 bits of dest bne t1,zero,3f andi t0,SRCREG,3 # get last 3 bits of src bne t0,zero,5f /* * Forward aligned->aligned copy, 8*4 bytes at a time. */ li AT,-32 and t0,SIZEREG,AT # count truncated to multiple of 32 beq t0,zero,2f # any work to do? subu SIZEREG,t0 subu a3,SRCREG,t0 /* * loop body */ 1: # cp lw t3,-16(SRCREG) lw v1,-12(SRCREG) lw t0,-8(SRCREG) lw t1,-4(SRCREG) subu SRCREG,32 sw t3,-16(DSTREG) sw v1,-12(DSTREG) sw t0,-8(DSTREG) sw t1,-4(DSTREG) lw t1,12(SRCREG) lw t0,8(SRCREG) lw v1,4(SRCREG) lw t3,0(SRCREG) subu DSTREG,32 sw t1,12(DSTREG) sw t0,8(DSTREG) sw v1,4(DSTREG) bne SRCREG,a3,1b sw t3,0(DSTREG) /* * Copy a word at a time, no loop unrolling. */ 2: # wordcopy andi t2,SIZEREG,3 # get byte count / 4 subu t2,SIZEREG,t2 # t2 = number of words to copy * 4 beq t2,zero,3f subu t0,SRCREG,t2 # stop at t0 subu SIZEREG,SIZEREG,t2 1: lw t3,-4(SRCREG) subu SRCREG,4 sw t3,-4(DSTREG) bne SRCREG,t0,1b subu DSTREG,4 3: # bytecopy beq SIZEREG,zero,4f # nothing left to do? nop 1: lb t3,-1(SRCREG) subu SRCREG,1 sb t3,-1(DSTREG) subu SIZEREG,1 bgtz SIZEREG,1b subu DSTREG,1 4: # copydone j ra nop /* * Copy from unaligned source to aligned dest. */ 5: # destaligned andi t0,SIZEREG,3 # t0 = bytecount mod 4 subu a3,SIZEREG,t0 # number of words to transfer beq a3,zero,3b nop move SIZEREG,t0 # this many to do after we are done subu a3,SRCREG,a3 # stop point 1: LWHI t3,-4(SRCREG) LWLO t3,-1(SRCREG) subu SRCREG,4 sw t3,-4(DSTREG) bne SRCREG,a3,1b subu DSTREG,4 j 3b nop .set reorder .set at END(FUNCTION) .set noreorder /* * memset(void *s1, int c, int len) */ LEAF(memset) .set noreorder blt a2, 12, smallclr # small amount to clear? move v0, a0 # save s1 for result sll t1, a1, 8 # compute c << 8 in t1 or t1, t1, a1 # compute c << 8 | c in 11 sll t2, t1, 16 # shift that left 16 or t1, t2, t1 # or together subu t0, zero, a0 # compute # bytes to word align address and t0, t0, 3 beq t0, zero, 1f # skip if word aligned subu a2, a2, t0 # subtract from remaining count SWHI t1, 0(a0) # store 1, 2, or 3 bytes to align addu a0, a0, t0 1: and v1, a2, 3 # compute number of whole words left subu t0, a2, v1 subu a2, a2, t0 addu t0, t0, a0 # compute ending address 2: addu a0, a0, 4 # clear words bne a0, t0, 2b # unrolling loop does not help sw t1, -4(a0) # since we are limited by memory speed smallclr: ble a2, zero, 2f addu t0, a2, a0 # compute ending address 1: addu a0, a0, 1 # clear bytes bne a0, t0, 1b sb a1, -1(a0) 2: j ra nop .set reorder END(memset)