
Add R parameter to M552 command to allow HTTP port to be configured When listing SD card files, ignore files with a leading '.' in the filename (e.g. files that Mac computers add to store metadata)
128 lines
4.9 KiB
Text
128 lines
4.9 KiB
Text
RSEG ICODE:CODE(2)
|
|
|
|
CODE16
|
|
|
|
public isqrt
|
|
|
|
isqrt:
|
|
; Enter with R0 having low 32 bits and R1 having high 32 bits of input parameter
|
|
; Exits with R0 having 32 bit result
|
|
|
|
; Performs the following pseudo code:
|
|
; INPUT = 64 bit input
|
|
; RESIDULE = 96 bit variable
|
|
; RESULT = 32 bit accumulated result
|
|
; Clear RESULT
|
|
; Set RESIDULE = INPUT (top 32 bits will be clear)
|
|
; Do this loop 32 times:
|
|
; Loop start
|
|
; {
|
|
; RESIDULE = RESIDULE << 2
|
|
; RESULT = RESULT << 1
|
|
; if ((RESULT << 1) + 1) <= (RESIDULE >> 32) then
|
|
; { RESIDULE = RESIDULE - ((RESULT << 1) + 1) << 32
|
|
; RESULT = RESULT + 1
|
|
; }
|
|
; }
|
|
; Loop end
|
|
; Return RESULT
|
|
|
|
; For optimisation the 32 iterations are divided into 2 sets of 16 iterations. The first set can shift
|
|
; RESIDULE as a 64 bit variable, and the second shifts it as a 96 bit variable while performing
|
|
; 32 and 64 bit arithmetic operations respectively on RESIDULE.
|
|
;
|
|
; A third loop is also used if the INPUT has the top 32 bits clear (only need to iterate 16 times)
|
|
; The total number of iterations needed is in fact equal to half the number of significant bits in the input
|
|
; but the overhead required to implement only the required number of iterations would on average take more time
|
|
; than it saves.
|
|
;
|
|
; The 32 bit shifts do not need to be executed because they are achieved by operating on the appropriate
|
|
; High or Low 32 bit register
|
|
|
|
; Register usage:
|
|
; R0 = Low 32 bits of input - re-used for low 32 bits of residule
|
|
; R1 = High 32 bits of input - reused for middle 32 bits of residule
|
|
; R2 = Top 32 bits of the 96 bit residule
|
|
; R3 = Loop counter
|
|
; R4 = Result accumulator
|
|
; R5 = Potential new residule middle 32 bits
|
|
; R6 = Potential new residule top 32 bits
|
|
; R7 = zero constant - because thumb has no immediate mode for SBC instruction
|
|
push {r2,r3,r4,r5,r6,r7} ; Save registers changed (no need to save entry or exit registers)
|
|
; Set up initial variables
|
|
mov r2,#0 ; Clear residulal shift registers
|
|
mov r3,r2 ; ...
|
|
mov r4,r2 ; Clear result
|
|
mov r7,r2 ; Clear 0 constant
|
|
mov r3,#16 ; Do 16 iterations per loop
|
|
; See if we have any high bits
|
|
cmp r1,r7 ; Is high 32 bits clear?
|
|
beq Isqrt_Loop_32 ; Only need to do low 32 bits if so
|
|
Isqrt_Loop_High:
|
|
add r1,r1,r1 ; Shift the residual up 2 bits, overflowing into R2
|
|
adc r2,r2 ; ...
|
|
add r1,r1,r1 ; ...
|
|
adc r2,r2 ; ...
|
|
|
|
lsl r4,r4,#1 ; Shift accumulated result left 1 bit (this will clear the carry flag)
|
|
mov r5,r2 ; Thumb need source=destination
|
|
sbc r5,r4 ; Take potential new result +1 from shifted residual
|
|
bcc Loopback_High ; If result is already greater we don't set the bit
|
|
sub r5,r5,r4 ; R5 now has (result << 1 +1)
|
|
bcc Loopback_High ; If result is greater we don't set the bit
|
|
mov r2,r5 ; Replace if result was lower or equal
|
|
add r4,r4,#1 ; Set the bit in the result
|
|
Loopback_High: sub r3,r3,#1 ; Loop counter
|
|
bne Isqrt_Loop_High ; Loop 16 times
|
|
|
|
; R1 must now be clear (because it has been left shifted 32 times)
|
|
; Continue, this time using 64 bit arithmetic, re-using R1 for the high 32 bits (and R2 has the low 32 bits)
|
|
mov r3,#16 ; Do 16 iterations per loop
|
|
Isqrt_Loop_Low:
|
|
add r0,r0,r0 ; Shift the 64 bit residual up 2 bits (2 squared), overflowing into R1
|
|
adc r2,r2 ; ...
|
|
adc r1,r1 ; ...
|
|
add r0,r0,r0 ; ...
|
|
adc r2,r2 ; ...
|
|
adc r1,r1 ; ...
|
|
lsl r4,r4,#1 ; Shift accumulated result left 1 bit (this will clear the carry flag)
|
|
mov r5,r2 ; Thumb needs source-destination
|
|
mov r6,r1 ; Thumb needs source-destination
|
|
sbc r5,r4 ; Take potential new result +1 from shifted residual
|
|
sbc r6,r7 ; Deal with carry into top 32 bits
|
|
bcc Loopback_Low ; If result is already greater we don't set the bit
|
|
sbc r5,r4 ; Subtract result again (result << 1 +1)
|
|
sbc r6,r7 ; R5/R6 now has potential new value
|
|
bcc Loopback_Low ; If result is greater we don't set the bit
|
|
mov r2,r5 ; Replace if result was lower or equal
|
|
mov r1,r6 ; ...
|
|
add r4,r4,#1 ; Set the bit in the result
|
|
Loopback_Low: sub r3,r3,#1 ; Loop counter
|
|
bne Isqrt_Loop_Low ; Loop 16 times
|
|
|
|
mov r0,r4 ; Result back into R0
|
|
pop {r2,r3,r4,r5,r6,r7}; Recover registers
|
|
bx r14 ; Return
|
|
|
|
; This routine is used instead if the top 32 bits of the input are zero
|
|
Isqrt_Loop_32: add r0,r0,r0 ; Shift the 32 bit residual up 2 bits (2 squared), overflowing into R2
|
|
adc r2,r2 ; ...
|
|
add r0,r0,r0 ; ...
|
|
adc r2,r2 ; ...
|
|
|
|
lsl r4,r4,#1 ; Shift accumulated result left 1 bit (this will clear the carry flag)
|
|
mov r5,r2 ; Thumb need source=destination
|
|
sbc r5,r4 ; Take potential new result +1 from shifted residual
|
|
bcc Loopback_32 ; If result is already greater we don't set the bit
|
|
sub r5,r5,r4 ; R5 now has (result << 1 +1)
|
|
bcc Loopback_32 ; If result is greater we don't set the bit
|
|
mov r2,r5 ; Replace if result was lower or equal
|
|
add r4,r4,#1 ; Set the bit in the result
|
|
Loopback_32: sub r3,r3,#1 ; Loop counter
|
|
bne Isqrt_Loop_32 ; Loop 16 times
|
|
|
|
mov r0,r4 ; Result back into R0
|
|
pop {r2,r3,r4,r5,r6,r7}; Recover registers
|
|
bx r14 ; Return
|
|
|
|
END
|