| 1 | /* |
|---|
| 2 | $NetBSD: bcopy.S,v 1.8.2.1 2002/11/11 22:21:34 nathanw Exp $ |
|---|
| 3 | Mach Operating System |
|---|
| 4 | Copyright (c) 1993 Carnegie Mellon University |
|---|
| 5 | All Rights Reserved. |
|---|
| 6 | |
|---|
| 7 | Permission to use, copy, modify and distribute this software and its documentation is hereby granted,provided that both the |
|---|
| 8 | copyright notice and this permission notice appear in all copies of the software, derivative works or modified versions, and any |
|---|
| 9 | portions thereof, and that both notices appear in supporting documentation. |
|---|
| 10 | |
|---|
| 11 | CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION. CARNEGIE MELLON DISCLAIMS |
|---|
| 12 | ANY LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. |
|---|
| 13 | Carnegie Mellon requests users of this software to return to |
|---|
| 14 | |
|---|
| 15 | * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU |
|---|
| 16 | * School of Computer Science |
|---|
| 17 | * Carnegie Mellon University |
|---|
| 18 | * Pittsburgh PA 15213-3890 ' |
|---|
| 19 | |
|---|
| 20 | any improvements or extensions that they make and grant Carnegie Mellon the rights to redistribute these changes. |
|---|
| 21 | */ |
|---|
| 22 | |
|---|
| 23 | /* |
|---|
| 24 | * Adapted for uClibc from NetBSD bcopy.S,v 1.8.2.1 2002/11/11 |
|---|
| 25 | */ |
|---|
| 26 | #include "bcm_mips.h" |
|---|
| 27 | |
|---|
| 28 | #ifdef __BIG_ENDIAN__ |
|---|
| 29 | #define LWHI lwl |
|---|
| 30 | #define SWHI swl |
|---|
| 31 | #define LWLO lwr |
|---|
| 32 | #define SWLO swr |
|---|
| 33 | #else |
|---|
| 34 | #define LWHI lwr |
|---|
| 35 | #define SWHI swr |
|---|
| 36 | #define LWLO lwl |
|---|
| 37 | #define SWLO swl |
|---|
| 38 | #endif |
|---|
| 39 | /* global leaf function (does not call other functions) */ |
|---|
| 40 | #define LEAF(name) \ |
|---|
| 41 | .globl name; \ |
|---|
| 42 | .ent name; \ |
|---|
| 43 | name: |
|---|
| 44 | |
|---|
| 45 | /* end of a global function */ |
|---|
| 46 | #define END(name) \ |
|---|
| 47 | .size name,.-name; \ |
|---|
| 48 | .end name |
|---|
| 49 | |
|---|
| 50 | #ifdef __ABICALLS__ |
|---|
| 51 | .abicalls |
|---|
| 52 | #endif |
|---|
| 53 | |
|---|
| 54 | /* |
|---|
| 55 | * bcopy(caddr_t src, caddr_t dst, unsigned int len) |
|---|
| 56 | * |
|---|
| 57 | * a0 src address |
|---|
| 58 | * a1 dst address |
|---|
| 59 | * a2 length |
|---|
| 60 | */ |
|---|
| 61 | #if defined(MEMCOPY) || defined(MEMMOVE) |
|---|
| 62 | #ifdef MEMCOPY |
|---|
| 63 | #define FUNCTION memcpy |
|---|
| 64 | #else |
|---|
| 65 | #define FUNCTION memmove |
|---|
| 66 | #endif |
|---|
| 67 | #define SRCREG a1 |
|---|
| 68 | #define DSTREG a0 |
|---|
| 69 | #else |
|---|
| 70 | #define FUNCTION bcopy |
|---|
| 71 | #define SRCREG a0 |
|---|
| 72 | #define DSTREG a1 |
|---|
| 73 | #endif |
|---|
| 74 | |
|---|
| 75 | #define SIZEREG a2 |
|---|
| 76 | |
|---|
| 77 | LEAF(FUNCTION) |
|---|
| 78 | .set noat |
|---|
| 79 | .set noreorder |
|---|
| 80 | |
|---|
| 81 | #if defined(MEMCOPY) || defined(MEMMOVE) |
|---|
| 82 | /* set up return value, while we still can */ |
|---|
| 83 | move v0,DSTREG |
|---|
| 84 | #endif |
|---|
| 85 | /* |
|---|
| 86 | * Make sure we can copy forwards. |
|---|
| 87 | */ |
|---|
| 88 | sltu t0,SRCREG,DSTREG # t0 == SRCREG < DSTREG |
|---|
| 89 | bne t0,zero,6f # copy backwards |
|---|
| 90 | |
|---|
| 91 | /* |
|---|
| 92 | * There are four alignment cases (with frequency) |
|---|
| 93 | * (Based on measurements taken with a DECstation 5000/200 |
|---|
| 94 | * inside a Mach kernel.) |
|---|
| 95 | * |
|---|
| 96 | * aligned -> aligned (mostly) |
|---|
| 97 | * unaligned -> aligned (sometimes) |
|---|
| 98 | * aligned,unaligned -> unaligned (almost never) |
|---|
| 99 | * |
|---|
| 100 | * Note that we could add another case that checks if |
|---|
| 101 | * the destination and source are unaligned but the |
|---|
| 102 | * copy is alignable. eg if src and dest are both |
|---|
| 103 | * on a halfword boundary. |
|---|
| 104 | */ |
|---|
| 105 | andi t1,DSTREG,3 # get last 3 bits of dest |
|---|
| 106 | bne t1,zero,3f |
|---|
| 107 | andi t0,SRCREG,3 # get last 3 bits of src |
|---|
| 108 | bne t0,zero,5f |
|---|
| 109 | |
|---|
| 110 | /* |
|---|
| 111 | * Forward aligned->aligned copy, 8*4 bytes at a time. |
|---|
| 112 | */ |
|---|
| 113 | li AT,-32 |
|---|
| 114 | and t0,SIZEREG,AT # count truncated to multiple of 32 */ |
|---|
| 115 | addu a3,SRCREG,t0 # run fast loop up to this address |
|---|
| 116 | sltu AT,SRCREG,a3 # any work to do? |
|---|
| 117 | beq AT,zero,2f |
|---|
| 118 | subu SIZEREG,t0 |
|---|
| 119 | |
|---|
| 120 | /* |
|---|
| 121 | * loop body |
|---|
| 122 | */ |
|---|
| 123 | 1: # cp |
|---|
| 124 | lw t3,0(SRCREG) |
|---|
| 125 | lw v1,4(SRCREG) |
|---|
| 126 | lw t0,8(SRCREG) |
|---|
| 127 | lw t1,12(SRCREG) |
|---|
| 128 | addu SRCREG,32 |
|---|
| 129 | sw t3,0(DSTREG) |
|---|
| 130 | sw v1,4(DSTREG) |
|---|
| 131 | sw t0,8(DSTREG) |
|---|
| 132 | sw t1,12(DSTREG) |
|---|
| 133 | lw t1,-4(SRCREG) |
|---|
| 134 | lw t0,-8(SRCREG) |
|---|
| 135 | lw v1,-12(SRCREG) |
|---|
| 136 | lw t3,-16(SRCREG) |
|---|
| 137 | addu DSTREG,32 |
|---|
| 138 | sw t1,-4(DSTREG) |
|---|
| 139 | sw t0,-8(DSTREG) |
|---|
| 140 | sw v1,-12(DSTREG) |
|---|
| 141 | bne SRCREG,a3,1b |
|---|
| 142 | sw t3,-16(DSTREG) |
|---|
| 143 | |
|---|
| 144 | /* |
|---|
| 145 | * Copy a word at a time, no loop unrolling. |
|---|
| 146 | */ |
|---|
| 147 | 2: # wordcopy |
|---|
| 148 | andi t2,SIZEREG,3 # get byte count / 4 |
|---|
| 149 | subu t2,SIZEREG,t2 # t2 = number of words to copy * 4 |
|---|
| 150 | beq t2,zero,3f |
|---|
| 151 | addu t0,SRCREG,t2 # stop at t0 |
|---|
| 152 | subu SIZEREG,SIZEREG,t2 |
|---|
| 153 | 1: |
|---|
| 154 | lw t3,0(SRCREG) |
|---|
| 155 | addu SRCREG,4 |
|---|
| 156 | sw t3,0(DSTREG) |
|---|
| 157 | bne SRCREG,t0,1b |
|---|
| 158 | addu DSTREG,4 |
|---|
| 159 | |
|---|
| 160 | 3: # bytecopy |
|---|
| 161 | beq SIZEREG,zero,4f # nothing left to do? |
|---|
| 162 | nop |
|---|
| 163 | 1: |
|---|
| 164 | lb t3,0(SRCREG) |
|---|
| 165 | addu SRCREG,1 |
|---|
| 166 | sb t3,0(DSTREG) |
|---|
| 167 | subu SIZEREG,1 |
|---|
| 168 | bgtz SIZEREG,1b |
|---|
| 169 | addu DSTREG,1 |
|---|
| 170 | |
|---|
| 171 | 4: # copydone |
|---|
| 172 | j ra |
|---|
| 173 | nop |
|---|
| 174 | |
|---|
| 175 | /* |
|---|
| 176 | * Copy from unaligned source to aligned dest. |
|---|
| 177 | */ |
|---|
| 178 | 5: # destaligned |
|---|
| 179 | andi t0,SIZEREG,3 # t0 = bytecount mod 4 |
|---|
| 180 | subu a3,SIZEREG,t0 # number of words to transfer |
|---|
| 181 | beq a3,zero,3b |
|---|
| 182 | nop |
|---|
| 183 | move SIZEREG,t0 # this many to do after we are done |
|---|
| 184 | addu a3,SRCREG,a3 # stop point |
|---|
| 185 | |
|---|
| 186 | 1: |
|---|
| 187 | LWHI t3,0(SRCREG) |
|---|
| 188 | LWLO t3,3(SRCREG) |
|---|
| 189 | addi SRCREG,4 |
|---|
| 190 | sw t3,0(DSTREG) |
|---|
| 191 | bne SRCREG,a3,1b |
|---|
| 192 | addi DSTREG,4 |
|---|
| 193 | |
|---|
| 194 | j 3b |
|---|
| 195 | nop |
|---|
| 196 | |
|---|
| 197 | 6: # backcopy -- based on above |
|---|
| 198 | addu SRCREG,SIZEREG |
|---|
| 199 | addu DSTREG,SIZEREG |
|---|
| 200 | andi t1,DSTREG,3 # get last 3 bits of dest |
|---|
| 201 | bne t1,zero,3f |
|---|
| 202 | andi t0,SRCREG,3 # get last 3 bits of src |
|---|
| 203 | bne t0,zero,5f |
|---|
| 204 | |
|---|
| 205 | /* |
|---|
| 206 | * Forward aligned->aligned copy, 8*4 bytes at a time. |
|---|
| 207 | */ |
|---|
| 208 | li AT,-32 |
|---|
| 209 | and t0,SIZEREG,AT # count truncated to multiple of 32 |
|---|
| 210 | beq t0,zero,2f # any work to do? |
|---|
| 211 | subu SIZEREG,t0 |
|---|
| 212 | subu a3,SRCREG,t0 |
|---|
| 213 | |
|---|
| 214 | /* |
|---|
| 215 | * loop body |
|---|
| 216 | */ |
|---|
| 217 | 1: # cp |
|---|
| 218 | lw t3,-16(SRCREG) |
|---|
| 219 | lw v1,-12(SRCREG) |
|---|
| 220 | lw t0,-8(SRCREG) |
|---|
| 221 | lw t1,-4(SRCREG) |
|---|
| 222 | subu SRCREG,32 |
|---|
| 223 | sw t3,-16(DSTREG) |
|---|
| 224 | sw v1,-12(DSTREG) |
|---|
| 225 | sw t0,-8(DSTREG) |
|---|
| 226 | sw t1,-4(DSTREG) |
|---|
| 227 | lw t1,12(SRCREG) |
|---|
| 228 | lw t0,8(SRCREG) |
|---|
| 229 | lw v1,4(SRCREG) |
|---|
| 230 | lw t3,0(SRCREG) |
|---|
| 231 | subu DSTREG,32 |
|---|
| 232 | sw t1,12(DSTREG) |
|---|
| 233 | sw t0,8(DSTREG) |
|---|
| 234 | sw v1,4(DSTREG) |
|---|
| 235 | bne SRCREG,a3,1b |
|---|
| 236 | sw t3,0(DSTREG) |
|---|
| 237 | |
|---|
| 238 | /* |
|---|
| 239 | * Copy a word at a time, no loop unrolling. |
|---|
| 240 | */ |
|---|
| 241 | 2: # wordcopy |
|---|
| 242 | andi t2,SIZEREG,3 # get byte count / 4 |
|---|
| 243 | subu t2,SIZEREG,t2 # t2 = number of words to copy * 4 |
|---|
| 244 | beq t2,zero,3f |
|---|
| 245 | subu t0,SRCREG,t2 # stop at t0 |
|---|
| 246 | subu SIZEREG,SIZEREG,t2 |
|---|
| 247 | 1: |
|---|
| 248 | lw t3,-4(SRCREG) |
|---|
| 249 | subu SRCREG,4 |
|---|
| 250 | sw t3,-4(DSTREG) |
|---|
| 251 | bne SRCREG,t0,1b |
|---|
| 252 | subu DSTREG,4 |
|---|
| 253 | |
|---|
| 254 | 3: # bytecopy |
|---|
| 255 | beq SIZEREG,zero,4f # nothing left to do? |
|---|
| 256 | nop |
|---|
| 257 | 1: |
|---|
| 258 | lb t3,-1(SRCREG) |
|---|
| 259 | subu SRCREG,1 |
|---|
| 260 | sb t3,-1(DSTREG) |
|---|
| 261 | subu SIZEREG,1 |
|---|
| 262 | bgtz SIZEREG,1b |
|---|
| 263 | subu DSTREG,1 |
|---|
| 264 | |
|---|
| 265 | 4: # copydone |
|---|
| 266 | j ra |
|---|
| 267 | nop |
|---|
| 268 | |
|---|
| 269 | /* |
|---|
| 270 | * Copy from unaligned source to aligned dest. |
|---|
| 271 | */ |
|---|
| 272 | 5: # destaligned |
|---|
| 273 | andi t0,SIZEREG,3 # t0 = bytecount mod 4 |
|---|
| 274 | subu a3,SIZEREG,t0 # number of words to transfer |
|---|
| 275 | beq a3,zero,3b |
|---|
| 276 | nop |
|---|
| 277 | move SIZEREG,t0 # this many to do after we are done |
|---|
| 278 | subu a3,SRCREG,a3 # stop point |
|---|
| 279 | |
|---|
| 280 | 1: |
|---|
| 281 | LWHI t3,-4(SRCREG) |
|---|
| 282 | LWLO t3,-1(SRCREG) |
|---|
| 283 | subu SRCREG,4 |
|---|
| 284 | sw t3,-4(DSTREG) |
|---|
| 285 | bne SRCREG,a3,1b |
|---|
| 286 | subu DSTREG,4 |
|---|
| 287 | |
|---|
| 288 | j 3b |
|---|
| 289 | nop |
|---|
| 290 | |
|---|
| 291 | .set reorder |
|---|
| 292 | .set at |
|---|
| 293 | END(FUNCTION) |
|---|
| 294 | |
|---|
| 295 | |
|---|
| 296 | .set noreorder |
|---|
| 297 | |
|---|
| 298 | |
|---|
| 299 | /* |
|---|
| 300 | * memset(void *s1, int c, int len) |
|---|
| 301 | */ |
|---|
| 302 | LEAF(memset) |
|---|
| 303 | .set noreorder |
|---|
| 304 | blt a2, 12, smallclr # small amount to clear? |
|---|
| 305 | move v0, a0 # save s1 for result |
|---|
| 306 | |
|---|
| 307 | sll t1, a1, 8 # compute c << 8 in t1 |
|---|
| 308 | or t1, t1, a1 # compute c << 8 | c in 11 |
|---|
| 309 | sll t2, t1, 16 # shift that left 16 |
|---|
| 310 | or t1, t2, t1 # or together |
|---|
| 311 | |
|---|
| 312 | subu t0, zero, a0 # compute # bytes to word align address |
|---|
| 313 | and t0, t0, 3 |
|---|
| 314 | beq t0, zero, 1f # skip if word aligned |
|---|
| 315 | subu a2, a2, t0 # subtract from remaining count |
|---|
| 316 | SWHI t1, 0(a0) # store 1, 2, or 3 bytes to align |
|---|
| 317 | addu a0, a0, t0 |
|---|
| 318 | 1: |
|---|
| 319 | and v1, a2, 3 # compute number of whole words left |
|---|
| 320 | subu t0, a2, v1 |
|---|
| 321 | subu a2, a2, t0 |
|---|
| 322 | addu t0, t0, a0 # compute ending address |
|---|
| 323 | 2: |
|---|
| 324 | addu a0, a0, 4 # clear words |
|---|
| 325 | bne a0, t0, 2b # unrolling loop does not help |
|---|
| 326 | sw t1, -4(a0) # since we are limited by memory speed |
|---|
| 327 | |
|---|
| 328 | smallclr: |
|---|
| 329 | ble a2, zero, 2f |
|---|
| 330 | addu t0, a2, a0 # compute ending address |
|---|
| 331 | 1: |
|---|
| 332 | addu a0, a0, 1 # clear bytes |
|---|
| 333 | bne a0, t0, 1b |
|---|
| 334 | sb a1, -1(a0) |
|---|
| 335 | 2: |
|---|
| 336 | j ra |
|---|
| 337 | nop |
|---|
| 338 | .set reorder |
|---|
| 339 | END(memset) |
|---|