
Improved speed of integer square root function Fixed bug in GCodeBuffer IsEmpty function (thanks zpl) Changed tool numbers to start at T0 in all /sys files Added sample macro files
154 lines
4 KiB
C++
154 lines
4 KiB
C++
#include "RepRapFirmware.h"
|
|
#include "DriveMovement.h"
|
|
|
|
// The remaining functions are speed-critical, so use full optimisation
|
|
#pragma GCC optimize ("O3")
|
|
|
|
// Fast 62-bit integer square root function (thanks dmould)
|
|
uint32_t isqrt64(uint64_t num)
|
|
{
|
|
uint32_t numHigh = (uint32_t)(num >> 32);
|
|
uint32_t res;
|
|
|
|
if ((numHigh & (3 << 30)) != 0)
|
|
{
|
|
// Input out of range - probably negative, so return -1
|
|
res = 0xFFFFFFFF;
|
|
}
|
|
else
|
|
{
|
|
res = 0;
|
|
|
|
#define iter64a(N) \
|
|
{ \
|
|
res <<= 1; \
|
|
uint32_t temp = (res | 1) << (N); \
|
|
if (numHigh >= temp) \
|
|
{ \
|
|
numHigh -= temp; \
|
|
res |= 2; \
|
|
} \
|
|
}
|
|
|
|
// We need to do 15 iterations (not 16 because we have eliminated the top 2 bits)
|
|
iter64a(28) iter64a(26) iter64a(24)
|
|
iter64a(22) iter64a(20) iter64a(18) iter64a(16)
|
|
iter64a(14) iter64a(12) iter64a(10) iter64a(8)
|
|
iter64a(6) iter64a(4) iter64a(2) iter64a(0)
|
|
|
|
// resHigh is twice the square root of the msw, in the range 0..2^17-1
|
|
uint64_t numAll = ((uint64_t)numHigh << 32) | (uint32_t)num;
|
|
|
|
#define iter64b(N) \
|
|
{ \
|
|
res <<= 1; \
|
|
uint64_t temp = ((uint64_t)res | 1) << (N); \
|
|
if (numAll >= temp) \
|
|
{ \
|
|
numAll -= temp; \
|
|
res |= 2; \
|
|
} \
|
|
}
|
|
|
|
// We need to do 16 iterations.
|
|
// After the last iteration, numAll may be between 0 and (1 + 2 * res) inclusive.
|
|
// So to take square roots of numbers up to 64 bits, we need to do all these iterations using 64 bit maths.
|
|
// If we restricted the input to e.g. 48 bits, then we could do some of the final iterations using 32-bit maths.
|
|
iter64b(30) iter64b(28) iter64b(26) iter64b(24)
|
|
iter64b(22) iter64b(20) iter64b(18) iter64b(16)
|
|
iter64b(14) iter64b(12) iter64b(10) iter64b(8)
|
|
iter64b(6) iter64b(4) iter64b(2) iter64b(0)
|
|
|
|
res >>= 1;
|
|
|
|
#undef iter64a
|
|
#undef iter64b
|
|
|
|
}
|
|
|
|
return res;
|
|
}
|
|
|
|
#if 0
|
|
// Fast 64-bit integer square root function
|
|
uint32_t isqrt2(uint64_t num)
|
|
{
|
|
uint32_t numHigh = (uint32_t)(num >> 32);
|
|
uint32_t resHigh = 0;
|
|
|
|
#define iter64a(N) \
|
|
{ \
|
|
uint32_t temp = resHigh + (1 << (N)); \
|
|
if (numHigh >= temp << (N)) \
|
|
{ \
|
|
numHigh -= temp << (N); \
|
|
resHigh |= 2 << (N); \
|
|
} \
|
|
}
|
|
|
|
// We need to do 16 iterations
|
|
iter64a(15); iter64a(14); iter64a(13); iter64a(12);
|
|
iter64a(11); iter64a(10); iter64a(9); iter64a(8);
|
|
iter64a(7); iter64a(6); iter64a(5); iter64a(4);
|
|
iter64a(3); iter64a(2); iter64a(1); iter64a(0);
|
|
|
|
// resHigh is twice the square root of the msw, in the range 0..2^17-1
|
|
uint64_t res = (uint64_t)resHigh << 16;
|
|
uint64_t numAll = ((uint64_t)numHigh << 32) | (uint32_t)num;
|
|
|
|
#define iter64b(N) \
|
|
{ \
|
|
uint64_t temp = res | (1 << (N)); \
|
|
if (numAll >= temp << (N)) \
|
|
{ \
|
|
numAll -= temp << (N); \
|
|
res |= 2 << (N); \
|
|
} \
|
|
}
|
|
|
|
// We need to do 16 iterations.
|
|
// After the last iteration, numAll may be between 0 and (1 + 2 * res) inclusive.
|
|
// So to take square roots of numbers up to 64 bits, we need to do all these iterations using 64 bit maths.
|
|
// If we restricted the input to e.g. 48 bits, then we could do some of the final iterations using 32-bit maths.
|
|
iter64b(15) iter64b(14) iter64b(13) iter64b(12)
|
|
iter64b(11) iter64b(10) iter64b(9) iter64b(8)
|
|
iter64b(7) iter64b(6) iter64b(5) iter64b(4)
|
|
iter64b(3) iter64b(2) iter64b(1) iter64b(0)
|
|
|
|
return (uint32_t)(res >> 1);
|
|
|
|
#undef iter64a
|
|
#undef iter64b
|
|
#undef iter32
|
|
}
|
|
|
|
uint32_t sqSum1 = 0, sqSum2 = 0, sqCount = 0, sqErrors = 0, lastRes1 = 0, lastRes2 = 0;
|
|
uint64_t lastNum = 0;
|
|
|
|
/* static */ uint32_t isqrt64(uint64_t num)
|
|
{
|
|
irqflags_t flags = cpu_irq_save();
|
|
uint32_t t2 = Platform::GetInterruptClocks();
|
|
uint32_t res1 = isqrt1(num);
|
|
uint32_t t3 = Platform::GetInterruptClocks();
|
|
uint32_t res2 = isqrt2(num);
|
|
uint32_t t4 = Platform::GetInterruptClocks();
|
|
cpu_irq_restore(flags);
|
|
|
|
sqSum1 += (t3 - t2);
|
|
sqSum2 += (t4 - t3);
|
|
++sqCount;
|
|
if (res1 != res2)
|
|
{
|
|
++sqErrors;
|
|
lastNum = num;
|
|
lastRes1 = res1;
|
|
lastRes2 = res2;
|
|
}
|
|
|
|
return res2;
|
|
}
|
|
|
|
#endif
|
|
|
|
// End
|