246 lines
		
	
	
		
			4.5 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
			
		
		
	
	
			246 lines
		
	
	
		
			4.5 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
/* SPDX-License-Identifier: GPL-2.0 */
 | 
						|
/*
 | 
						|
 * Copyright 2010, Google Inc.
 | 
						|
 *
 | 
						|
 * Brought in from coreboot uldivmod.S
 | 
						|
 */
 | 
						|
 | 
						|
#include <linux/linkage.h>
 | 
						|
#include <asm/assembler.h>
 | 
						|
 | 
						|
/*
 | 
						|
 * A, Q = r0 + (r1 << 32)
 | 
						|
 * B, R = r2 + (r3 << 32)
 | 
						|
 * A / B = Q ... R
 | 
						|
 */
 | 
						|
 | 
						|
A_0	.req	r0
 | 
						|
A_1	.req	r1
 | 
						|
B_0	.req	r2
 | 
						|
B_1	.req	r3
 | 
						|
C_0	.req	r4
 | 
						|
C_1	.req	r5
 | 
						|
D_0	.req	r6
 | 
						|
D_1	.req	r7
 | 
						|
 | 
						|
Q_0	.req	r0
 | 
						|
Q_1	.req	r1
 | 
						|
R_0	.req	r2
 | 
						|
R_1	.req	r3
 | 
						|
 | 
						|
THUMB(
 | 
						|
TMP	.req	r8
 | 
						|
)
 | 
						|
 | 
						|
.pushsection .text.__aeabi_uldivmod, "ax"
 | 
						|
ENTRY(__aeabi_uldivmod)
 | 
						|
 | 
						|
	stmfd	sp!, {r4, r5, r6, r7, THUMB(TMP,) lr}
 | 
						|
	@ Test if B == 0
 | 
						|
	orrs	ip, B_0, B_1		@ Z set -> B == 0
 | 
						|
	beq	L_div_by_0
 | 
						|
	@ Test if B is power of 2: (B & (B - 1)) == 0
 | 
						|
	subs	C_0, B_0, #1
 | 
						|
	sbc	C_1, B_1, #0
 | 
						|
	tst	C_0, B_0
 | 
						|
	tsteq	B_1, C_1
 | 
						|
	beq	L_pow2
 | 
						|
	@ Test if A_1 == B_1 == 0
 | 
						|
	orrs	ip, A_1, B_1
 | 
						|
	beq	L_div_32_32
 | 
						|
 | 
						|
L_div_64_64:
 | 
						|
/* CLZ only exists in ARM architecture version 5 and above. */
 | 
						|
#ifdef HAVE_CLZ
 | 
						|
	mov	C_0, #1
 | 
						|
	mov	C_1, #0
 | 
						|
	@ D_0 = clz A
 | 
						|
	teq	A_1, #0
 | 
						|
	clz	D_0, A_1
 | 
						|
	clzeq	ip, A_0
 | 
						|
	addeq	D_0, D_0, ip
 | 
						|
	@ D_1 = clz B
 | 
						|
	teq	B_1, #0
 | 
						|
	clz	D_1, B_1
 | 
						|
	clzeq	ip, B_0
 | 
						|
	addeq	D_1, D_1, ip
 | 
						|
	@ if clz B - clz A > 0
 | 
						|
	subs	D_0, D_1, D_0
 | 
						|
	bls	L_done_shift
 | 
						|
	@ B <<= (clz B - clz A)
 | 
						|
	subs	D_1, D_0, #32
 | 
						|
	rsb	ip, D_0, #32
 | 
						|
	movmi	B_1, B_1, lsl D_0
 | 
						|
ARM(	orrmi	B_1, B_1, B_0, lsr ip	)
 | 
						|
THUMB(	lsrmi	TMP, B_0, ip		)
 | 
						|
THUMB(	orrmi	B_1, B_1, TMP		)
 | 
						|
	movpl	B_1, B_0, lsl D_1
 | 
						|
	mov	B_0, B_0, lsl D_0
 | 
						|
	@ C = 1 << (clz B - clz A)
 | 
						|
	movmi	C_1, C_1, lsl D_0
 | 
						|
ARM(	orrmi	C_1, C_1, C_0, lsr ip	)
 | 
						|
THUMB(	lsrmi	TMP, C_0, ip		)
 | 
						|
THUMB(	orrmi	C_1, C_1, TMP		)
 | 
						|
	movpl	C_1, C_0, lsl D_1
 | 
						|
	mov	C_0, C_0, lsl D_0
 | 
						|
L_done_shift:
 | 
						|
	mov	D_0, #0
 | 
						|
	mov	D_1, #0
 | 
						|
	@ C: current bit; D: result
 | 
						|
#else
 | 
						|
	@ C: current bit; D: result
 | 
						|
	mov	C_0, #1
 | 
						|
	mov	C_1, #0
 | 
						|
	mov	D_0, #0
 | 
						|
	mov	D_1, #0
 | 
						|
L_lsl_4:
 | 
						|
	cmp	B_1, #0x10000000
 | 
						|
	cmpcc	B_1, A_1
 | 
						|
	cmpeq	B_0, A_0
 | 
						|
	bcs	L_lsl_1
 | 
						|
	@ B <<= 4
 | 
						|
	mov	B_1, B_1, lsl #4
 | 
						|
	orr	B_1, B_1, B_0, lsr #28
 | 
						|
	mov	B_0, B_0, lsl #4
 | 
						|
	@ C <<= 4
 | 
						|
	mov	C_1, C_1, lsl #4
 | 
						|
	orr	C_1, C_1, C_0, lsr #28
 | 
						|
	mov	C_0, C_0, lsl #4
 | 
						|
	b	L_lsl_4
 | 
						|
L_lsl_1:
 | 
						|
	cmp	B_1, #0x80000000
 | 
						|
	cmpcc	B_1, A_1
 | 
						|
	cmpeq	B_0, A_0
 | 
						|
	bcs	L_subtract
 | 
						|
	@ B <<= 1
 | 
						|
	mov	B_1, B_1, lsl #1
 | 
						|
	orr	B_1, B_1, B_0, lsr #31
 | 
						|
	mov	B_0, B_0, lsl #1
 | 
						|
	@ C <<= 1
 | 
						|
	mov	C_1, C_1, lsl #1
 | 
						|
	orr	C_1, C_1, C_0, lsr #31
 | 
						|
	mov	C_0, C_0, lsl #1
 | 
						|
	b	L_lsl_1
 | 
						|
#endif
 | 
						|
L_subtract:
 | 
						|
	@ if A >= B
 | 
						|
	cmp	A_1, B_1
 | 
						|
	cmpeq	A_0, B_0
 | 
						|
	bcc	L_update
 | 
						|
	@ A -= B
 | 
						|
	subs	A_0, A_0, B_0
 | 
						|
	sbc	A_1, A_1, B_1
 | 
						|
	@ D |= C
 | 
						|
	orr	D_0, D_0, C_0
 | 
						|
	orr	D_1, D_1, C_1
 | 
						|
L_update:
 | 
						|
	@ if A == 0: break
 | 
						|
	orrs	ip, A_1, A_0
 | 
						|
	beq	L_exit
 | 
						|
	@ C >>= 1
 | 
						|
	movs	C_1, C_1, lsr #1
 | 
						|
	movs	C_0, C_0, rrx
 | 
						|
	@ if C == 0: break
 | 
						|
	orrs	ip, C_1, C_0
 | 
						|
	beq	L_exit
 | 
						|
	@ B >>= 1
 | 
						|
	movs	B_1, B_1, lsr #1
 | 
						|
	mov	B_0, B_0, rrx
 | 
						|
	b	L_subtract
 | 
						|
L_exit:
 | 
						|
	@ Note: A, B & Q, R are aliases
 | 
						|
	mov	R_0, A_0
 | 
						|
	mov	R_1, A_1
 | 
						|
	mov	Q_0, D_0
 | 
						|
	mov	Q_1, D_1
 | 
						|
	ldmfd	sp!, {r4, r5, r6, r7, THUMB(TMP,) pc}
 | 
						|
 | 
						|
L_div_32_32:
 | 
						|
	@ Note:	A_0 &	r0 are aliases
 | 
						|
	@	Q_1	r1
 | 
						|
	mov	r1, B_0
 | 
						|
	bl	__aeabi_uidivmod
 | 
						|
	mov	R_0, r1
 | 
						|
	mov	R_1, #0
 | 
						|
	mov	Q_1, #0
 | 
						|
	ldmfd	sp!, {r4, r5, r6, r7, THUMB(TMP,) pc}
 | 
						|
 | 
						|
L_pow2:
 | 
						|
#ifdef HAVE_CLZ
 | 
						|
	@ Note: A, B and Q, R are aliases
 | 
						|
	@ R = A & (B - 1)
 | 
						|
	and	C_0, A_0, C_0
 | 
						|
	and	C_1, A_1, C_1
 | 
						|
	@ Q = A >> log2(B)
 | 
						|
	@ Note: B must not be 0 here!
 | 
						|
	clz	D_0, B_0
 | 
						|
	add	D_1, D_0, #1
 | 
						|
	rsbs	D_0, D_0, #31
 | 
						|
	bpl	L_1
 | 
						|
	clz	D_0, B_1
 | 
						|
	rsb	D_0, D_0, #31
 | 
						|
	mov	A_0, A_1, lsr D_0
 | 
						|
	add	D_0, D_0, #32
 | 
						|
L_1:
 | 
						|
	movpl	A_0, A_0, lsr D_0
 | 
						|
ARM(	orrpl	A_0, A_0, A_1, lsl D_1	)
 | 
						|
THUMB(	lslpl	TMP, A_1, D_1		)
 | 
						|
THUMB(	orrpl	A_0, A_0, TMP		)
 | 
						|
	mov	A_1, A_1, lsr D_0
 | 
						|
	@ Mov back C to R
 | 
						|
	mov	R_0, C_0
 | 
						|
	mov	R_1, C_1
 | 
						|
	ldmfd	sp!, {r4, r5, r6, r7, THUMB(TMP,) pc}
 | 
						|
#else
 | 
						|
	@ Note: A, B and Q, R are aliases
 | 
						|
	@ R = A & (B - 1)
 | 
						|
	and	C_0, A_0, C_0
 | 
						|
	and	C_1, A_1, C_1
 | 
						|
	@ Q = A >> log2(B)
 | 
						|
	@ Note: B must not be 0 here!
 | 
						|
	@ Count the leading zeroes in B.
 | 
						|
	mov	D_0, #0
 | 
						|
	orrs	B_0, B_0, B_0
 | 
						|
	@ If B is greater than 1 << 31, divide A and B by 1 << 32.
 | 
						|
	moveq	A_0, A_1
 | 
						|
	moveq	A_1, #0
 | 
						|
	moveq	B_0, B_1
 | 
						|
	@ Count the remaining leading zeroes in B.
 | 
						|
	movs	B_1, B_0, lsl #16
 | 
						|
	addeq	D_0, #16
 | 
						|
	moveq	B_0, B_0, lsr #16
 | 
						|
	tst	B_0, #0xff
 | 
						|
	addeq	D_0, #8
 | 
						|
	moveq	B_0, B_0, lsr #8
 | 
						|
	tst	B_0, #0xf
 | 
						|
	addeq	D_0, #4
 | 
						|
	moveq	B_0, B_0, lsr #4
 | 
						|
	tst	B_0, #0x3
 | 
						|
	addeq	D_0, #2
 | 
						|
	moveq	B_0, B_0, lsr #2
 | 
						|
	tst	B_0, #0x1
 | 
						|
	addeq	D_0, #1
 | 
						|
	@ Shift A to the right by the appropriate amount.
 | 
						|
	rsb	D_1, D_0, #32
 | 
						|
	mov	Q_0, A_0, lsr D_0
 | 
						|
 ARM(   orr     Q_0, Q_0, A_1, lsl D_1	)
 | 
						|
 THUMB(	lsl	A_1, D_1		)
 | 
						|
 THUMB(	orr	Q_0, A_1		)
 | 
						|
	mov	Q_1, A_1, lsr D_0
 | 
						|
	@ Move C to R
 | 
						|
	mov	R_0, C_0
 | 
						|
	mov	R_1, C_1
 | 
						|
	ldmfd	sp!, {r4, r5, r6, r7, THUMB(TMP,) pc}
 | 
						|
#endif
 | 
						|
 | 
						|
L_div_by_0:
 | 
						|
	bl	__div0
 | 
						|
	@ As wrong as it could be
 | 
						|
	mov	Q_0, #0
 | 
						|
	mov	Q_1, #0
 | 
						|
	mov	R_0, #0
 | 
						|
	mov	R_1, #0
 | 
						|
	ldmfd	sp!, {r4, r5, r6, r7, THUMB(TMP,) pc}
 | 
						|
ENDPROC(__aeabi_uldivmod)
 | 
						|
.popsection
 |