This repository has been archived on 2025-02-01. You can view files and clone it, but cannot push or open issues or pull requests.
reprapfirmware-dc42/Isqrt.cpp
David Crocker 6e0f113927 Version 1.04d
Tidied up delta auto-calibration code. We now use least squares for 3,
4, 6 or 7-factor calibration.
When doing delta calibration, if a probe offset was used to adjust the
head position when probing, use the actual head coordinates instead the
probed coordinates.
Bug fix: newline was missing at end of SD card file list sent to USB
when in Marlin emulation mode.
Bug fix: Heater average PWM report (M573) sometimes gave inaccurate
values when the S parameter was used.
2015-04-08 18:03:59 +01:00

175 lines
4.6 KiB
C++

#include "RepRapFirmware.h"
#include "DriveMovement.h"
// The remaining functions are speed-critical, so use full optimisation
#pragma GCC optimize ("O3")
// Fast 62-bit integer square root function (thanks dmould)
uint32_t isqrt64(uint64_t num)
{
uint32_t numHigh = (uint32_t)(num >> 32);
if (numHigh == 0)
{
// 32-bit square root - thanks to Wilco Dijksra for this efficient ARM algorithm
uint32_t num32 = (uint32_t)num;
uint32_t res = 0;
#define iter32(N) \
{ \
uint32_t temp = res | (1 << N); \
if (num32 >= temp << N) \
{ \
num32 -= temp << N; \
res |= 2 << N; \
} \
}
// We need to do 16 iterations
iter32(15); iter32(14); iter32(13); iter32(12);
iter32(11); iter32(10); iter32(9); iter32(8);
iter32(7); iter32(6); iter32(5); iter32(4);
iter32(3); iter32(2); iter32(1); iter32(0);
return res >> 1;
}
else if ((numHigh & (3u << 30)) != 0)
{
// Input out of range - probably negative, so return -1
return 0xFFFFFFFF;
}
else
{
// 62-bit square root
uint32_t res = 0;
#define iter64a(N) \
{ \
res <<= 1; \
uint32_t temp = (res | 1) << (N); \
if (numHigh >= temp) \
{ \
numHigh -= temp; \
res |= 2; \
} \
}
// We need to do 15 iterations (not 16 because we have eliminated the top 2 bits)
iter64a(28) iter64a(26) iter64a(24)
iter64a(22) iter64a(20) iter64a(18) iter64a(16)
iter64a(14) iter64a(12) iter64a(10) iter64a(8)
iter64a(6) iter64a(4) iter64a(2) iter64a(0)
// resHigh is twice the square root of the msw, in the range 0..2^17-1
uint64_t numAll = ((uint64_t)numHigh << 32) | (uint32_t)num;
#define iter64b(N) \
{ \
res <<= 1; \
uint64_t temp = ((uint64_t)res | 1) << (N); \
if (numAll >= temp) \
{ \
numAll -= temp; \
res |= 2; \
} \
}
// We need to do 16 iterations.
// After the last iteration, numAll may be between 0 and (1 + 2 * res) inclusive.
// So to take square roots of numbers up to 64 bits, we need to do all these iterations using 64 bit maths.
// If we restricted the input to e.g. 48 bits, then we could do some of the final iterations using 32-bit maths.
iter64b(30) iter64b(28) iter64b(26) iter64b(24)
iter64b(22) iter64b(20) iter64b(18) iter64b(16)
iter64b(14) iter64b(12) iter64b(10) iter64b(8)
iter64b(6) iter64b(4) iter64b(2) iter64b(0)
return res >> 1;
#undef iter64a
#undef iter64b
}
}
#if 0
// Fast 64-bit integer square root function
uint32_t isqrt2(uint64_t num)
{
uint32_t numHigh = (uint32_t)(num >> 32);
uint32_t resHigh = 0;
#define iter64a(N) \
{ \
uint32_t temp = resHigh + (1 << (N)); \
if (numHigh >= temp << (N)) \
{ \
numHigh -= temp << (N); \
resHigh |= 2 << (N); \
} \
}
// We need to do 16 iterations
iter64a(15); iter64a(14); iter64a(13); iter64a(12);
iter64a(11); iter64a(10); iter64a(9); iter64a(8);
iter64a(7); iter64a(6); iter64a(5); iter64a(4);
iter64a(3); iter64a(2); iter64a(1); iter64a(0);
// resHigh is twice the square root of the msw, in the range 0..2^17-1
uint64_t res = (uint64_t)resHigh << 16;
uint64_t numAll = ((uint64_t)numHigh << 32) | (uint32_t)num;
#define iter64b(N) \
{ \
uint64_t temp = res | (1 << (N)); \
if (numAll >= temp << (N)) \
{ \
numAll -= temp << (N); \
res |= 2 << (N); \
} \
}
// We need to do 16 iterations.
// After the last iteration, numAll may be between 0 and (1 + 2 * res) inclusive.
// So to take square roots of numbers up to 64 bits, we need to do all these iterations using 64 bit maths.
// If we restricted the input to e.g. 48 bits, then we could do some of the final iterations using 32-bit maths.
iter64b(15) iter64b(14) iter64b(13) iter64b(12)
iter64b(11) iter64b(10) iter64b(9) iter64b(8)
iter64b(7) iter64b(6) iter64b(5) iter64b(4)
iter64b(3) iter64b(2) iter64b(1) iter64b(0)
return (uint32_t)(res >> 1);
#undef iter64a
#undef iter64b
#undef iter32
}
uint32_t sqSum1 = 0, sqSum2 = 0, sqCount = 0, sqErrors = 0, lastRes1 = 0, lastRes2 = 0;
uint64_t lastNum = 0;
/* static */ uint32_t isqrt64(uint64_t num)
{
irqflags_t flags = cpu_irq_save();
uint32_t t2 = Platform::GetInterruptClocks();
uint32_t res1 = isqrt1(num);
uint32_t t3 = Platform::GetInterruptClocks();
uint32_t res2 = isqrt2(num);
uint32_t t4 = Platform::GetInterruptClocks();
cpu_irq_restore(flags);
sqSum1 += (t3 - t2);
sqSum2 += (t4 - t3);
++sqCount;
if (res1 != res2)
{
++sqErrors;
lastNum = num;
lastRes1 = res1;
lastRes2 = res2;
}
return res2;
}
#endif
// End