diff --git a/src/rp2040/pico_platform/include/pico/platform/cpu_regs.h b/src/rp2040/pico_platform/include/pico/platform/cpu_regs.h index 21ff95af..a51ae84a 100644 --- a/src/rp2040/pico_platform/include/pico/platform/cpu_regs.h +++ b/src/rp2040/pico_platform/include/pico/platform/cpu_regs.h @@ -23,6 +23,7 @@ #define arm_cpu_hw m0plus_hw #include "hardware/structs/nvic.h" #include "hardware/structs/scb.h" +#include "hardware/structs/systick.h" #endif #endif \ No newline at end of file diff --git a/src/rp2350/pico_platform/include/pico/platform/cpu_regs.h b/src/rp2350/pico_platform/include/pico/platform/cpu_regs.h index b79127c7..87125b56 100644 --- a/src/rp2350/pico_platform/include/pico/platform/cpu_regs.h +++ b/src/rp2350/pico_platform/include/pico/platform/cpu_regs.h @@ -25,6 +25,7 @@ #define arm_cpu_hw m33_hw #include "hardware/structs/nvic.h" #include "hardware/structs/scb.h" +#include "hardware/structs/systick.h" #endif #endif #endif \ No newline at end of file diff --git a/src/rp2_common/pico_double/CMakeLists.txt b/src/rp2_common/pico_double/CMakeLists.txt index c038db86..2d5ac90e 100644 --- a/src/rp2_common/pico_double/CMakeLists.txt +++ b/src/rp2_common/pico_double/CMakeLists.txt @@ -23,68 +23,84 @@ if (NOT TARGET pico_double) $>,$,${PICO_DEFAULT_DOUBLE_IMPL}>) function(wrap_double_functions TARGET) - pico_wrap_function(${TARGET} __aeabi_dadd) - pico_wrap_function(${TARGET} __aeabi_ddiv) - pico_wrap_function(${TARGET} __aeabi_dmul) - pico_wrap_function(${TARGET} __aeabi_drsub) - pico_wrap_function(${TARGET} __aeabi_dsub) - pico_wrap_function(${TARGET} __aeabi_cdcmpeq) - pico_wrap_function(${TARGET} __aeabi_cdrcmple) - pico_wrap_function(${TARGET} __aeabi_cdcmple) - pico_wrap_function(${TARGET} __aeabi_dcmpeq) - pico_wrap_function(${TARGET} __aeabi_dcmplt) - pico_wrap_function(${TARGET} __aeabi_dcmple) - pico_wrap_function(${TARGET} __aeabi_dcmpge) - pico_wrap_function(${TARGET} __aeabi_dcmpgt) - pico_wrap_function(${TARGET} __aeabi_dcmpun) - pico_wrap_function(${TARGET} __aeabi_i2d) - pico_wrap_function(${TARGET} __aeabi_l2d) - pico_wrap_function(${TARGET} __aeabi_ui2d) - pico_wrap_function(${TARGET} __aeabi_ul2d) - pico_wrap_function(${TARGET} __aeabi_d2iz) - pico_wrap_function(${TARGET} __aeabi_d2lz) - pico_wrap_function(${TARGET} __aeabi_d2uiz) - pico_wrap_function(${TARGET} __aeabi_d2ulz) - pico_wrap_function(${TARGET} __aeabi_d2f) - pico_wrap_function(${TARGET} sqrt) - pico_wrap_function(${TARGET} cos) - pico_wrap_function(${TARGET} sin) - pico_wrap_function(${TARGET} tan) - pico_wrap_function(${TARGET} atan2) - pico_wrap_function(${TARGET} exp) - pico_wrap_function(${TARGET} log) - - pico_wrap_function(${TARGET} ldexp) - pico_wrap_function(${TARGET} copysign) - pico_wrap_function(${TARGET} trunc) - pico_wrap_function(${TARGET} floor) - pico_wrap_function(${TARGET} ceil) - pico_wrap_function(${TARGET} round) - pico_wrap_function(${TARGET} sincos) # gnu - pico_wrap_function(${TARGET} asin) - pico_wrap_function(${TARGET} acos) - pico_wrap_function(${TARGET} atan) - pico_wrap_function(${TARGET} sinh) - pico_wrap_function(${TARGET} cosh) - pico_wrap_function(${TARGET} tanh) - pico_wrap_function(${TARGET} asinh) - pico_wrap_function(${TARGET} acosh) - pico_wrap_function(${TARGET} atanh) - pico_wrap_function(${TARGET} exp2) - pico_wrap_function(${TARGET} log2) - pico_wrap_function(${TARGET} exp10) - pico_wrap_function(${TARGET} log10) - pico_wrap_function(${TARGET} pow) - pico_wrap_function(${TARGET} powint) #gnu - pico_wrap_function(${TARGET} hypot) - pico_wrap_function(${TARGET} cbrt) - pico_wrap_function(${TARGET} fmod) - pico_wrap_function(${TARGET} drem) - pico_wrap_function(${TARGET} remainder) - pico_wrap_function(${TARGET} remquo) - pico_wrap_function(${TARGET} expm1) - pico_wrap_function(${TARGET} log1p) - pico_wrap_function(${TARGET} fma) + cmake_parse_arguments(WRAP_DOUBLE "NO_AEABI_ARITHMETIC;NO_AEABI_CMP;NO_AEABI_CONV_32;NO_AEABI_CONV_64;NO_AEABI_CONV_FLOAT;NO_SQRT;NO_SCI;NO_SCI_EXTRA" "" "" ${ARGN} ) + if (NOT WRAP_DOUBLE_NO_AEABI_ARITHMETIC) + pico_wrap_function(${TARGET} __aeabi_dadd) + pico_wrap_function(${TARGET} __aeabi_ddiv) + pico_wrap_function(${TARGET} __aeabi_dmul) + pico_wrap_function(${TARGET} __aeabi_drsub) + pico_wrap_function(${TARGET} __aeabi_dsub) + endif() + if (NOT WRAP_DOUBLE_NO_AEABI_CMP) + pico_wrap_function(${TARGET} __aeabi_cdcmpeq) + pico_wrap_function(${TARGET} __aeabi_cdrcmple) + pico_wrap_function(${TARGET} __aeabi_cdcmple) + pico_wrap_function(${TARGET} __aeabi_dcmpeq) + pico_wrap_function(${TARGET} __aeabi_dcmplt) + pico_wrap_function(${TARGET} __aeabi_dcmple) + pico_wrap_function(${TARGET} __aeabi_dcmpge) + pico_wrap_function(${TARGET} __aeabi_dcmpgt) + pico_wrap_function(${TARGET} __aeabi_dcmpun) + endif() + if (NOT WRAP_DOUBLE_NO_AEABI_CONV_32) + pico_wrap_function(${TARGET} __aeabi_i2d) + pico_wrap_function(${TARGET} __aeabi_ui2d) + pico_wrap_function(${TARGET} __aeabi_d2iz) + pico_wrap_function(${TARGET} __aeabi_d2uiz) + endif() + if (NOT WRAP_DOUBLE_NO_AEABI_CONV_64) + pico_wrap_function(${TARGET} __aeabi_l2d) + pico_wrap_function(${TARGET} __aeabi_ul2d) + pico_wrap_function(${TARGET} __aeabi_d2lz) + pico_wrap_function(${TARGET} __aeabi_d2ulz) + endif() + if (NOT WRAP_DOUBLE_NO_AEABI_CONV_FLOAT) + pico_wrap_function(${TARGET} __aeabi_d2f) + endif() + if (NOT WRAP_DOUBLE_NO_SQRT) + pico_wrap_function(${TARGET} sqrt) + endif() + if (NOT WRAP_DOUBLE_NO_SCI) + pico_wrap_function(${TARGET} cos) + pico_wrap_function(${TARGET} sin) + pico_wrap_function(${TARGET} tan) + pico_wrap_function(${TARGET} atan2) + pico_wrap_function(${TARGET} exp) + pico_wrap_function(${TARGET} log) + endif() + if (NOT WRAP_DOUBLE_NO_SCI_EXTRA) + pico_wrap_function(${TARGET} ldexp) + pico_wrap_function(${TARGET} copysign) + pico_wrap_function(${TARGET} trunc) + pico_wrap_function(${TARGET} floor) + pico_wrap_function(${TARGET} ceil) + pico_wrap_function(${TARGET} round) + pico_wrap_function(${TARGET} sincos) # gnu + pico_wrap_function(${TARGET} asin) + pico_wrap_function(${TARGET} acos) + pico_wrap_function(${TARGET} atan) + pico_wrap_function(${TARGET} sinh) + pico_wrap_function(${TARGET} cosh) + pico_wrap_function(${TARGET} tanh) + pico_wrap_function(${TARGET} asinh) + pico_wrap_function(${TARGET} acosh) + pico_wrap_function(${TARGET} atanh) + pico_wrap_function(${TARGET} exp2) + pico_wrap_function(${TARGET} log2) + pico_wrap_function(${TARGET} exp10) + pico_wrap_function(${TARGET} log10) + pico_wrap_function(${TARGET} pow) + pico_wrap_function(${TARGET} powint) #gnu + pico_wrap_function(${TARGET} hypot) + pico_wrap_function(${TARGET} cbrt) + pico_wrap_function(${TARGET} fmod) + pico_wrap_function(${TARGET} drem) + pico_wrap_function(${TARGET} remainder) + pico_wrap_function(${TARGET} remquo) + pico_wrap_function(${TARGET} expm1) + pico_wrap_function(${TARGET} log1p) + pico_wrap_function(${TARGET} fma) + endif() endfunction() pico_add_library(pico_double_pico) @@ -96,8 +112,8 @@ if (NOT TARGET pico_double) ${CMAKE_CURRENT_LIST_DIR}/double_v1_rom_shim_rp2040.S ) target_link_libraries(pico_double_pico INTERFACE pico_bootrom pico_double_headers hardware_divider) - wrap_double_functions(pico_double_pico) - elseif(NOT PICO_RISCV) + wrap_double_functions(pico_double_pico) # wrap everything + elseif(PICO_RP2350 AND NOT PICO_RISCV) pico_add_library(pico_double_pico_dcp) target_sources(pico_double_pico_dcp INTERFACE ${CMAKE_CURRENT_LIST_DIR}/double_math.c @@ -107,11 +123,10 @@ if (NOT TARGET pico_double) ${CMAKE_CURRENT_LIST_DIR}/double_conv_m33.S ) target_link_libraries(pico_double_pico_dcp INTERFACE pico_double_headers) - wrap_double_functions(pico_double_pico_dcp) + wrap_double_functions(pico_double_pico_dcp) #wrap everything target_link_libraries(pico_double_pico INTERFACE pico_double_pico_dcp) endif() - pico_add_library(pico_double_none) target_sources(pico_double_none INTERFACE ${CMAKE_CURRENT_LIST_DIR}/double_none.S diff --git a/src/rp2_common/pico_double/include/pico/double.h b/src/rp2_common/pico_double/include/pico/double.h index 188c34f7..9afce8bb 100644 --- a/src/rp2_common/pico_double/include/pico/double.h +++ b/src/rp2_common/pico_double/include/pico/double.h @@ -84,7 +84,10 @@ extern "C" { * * - GNU extensions: * -* powint, sincos +* sincos +* +* Additional functions on Arm: +* powint * * On Arm, the following additional optimized functions are also provided when using `pico_double_pico`, all of which * saturate to the nearest representable value for too large input when converting from floating point types: @@ -129,9 +132,33 @@ extern "C" { * On RISC-V there is no custom double-precision floating point support, so `pico_double_pico` is equivalent to `pico_double_compiler` * \endif */ + +// === we always define these +#define PICO_DOUBLE_HAS_INT32_TO_DOUBLE_CONVERSIONS 1 +#define PICO_DOUBLE_HAS_INT64_TO_DOUBLE_CONVERSIONS 1 +// rounding towards zero +#define PICO_DOUBLE_HAS_DOUBLE_TO_INT32_Z_CONVERSIONS 1 +#define PICO_DOUBLE_HAS_DOUBLE_TO_INT64_Z_CONVERSIONS 1 +// === + +// PICO_CONFIG: PICO_DOUBLE_IN_RAM, Force placement of SDK provided double-precision floating point into RAM, type=bool, default=0, group=pico_float #if !defined(__riscv) || PICO_COMBINED_DOCS #if PICO_COMBINED_DOCS || !LIB_PICO_DOUBLE_COMPILER +#define PICO_DOUBLE_HAS_FIX32_TO_DOUBLE_CONVERSIONS 1 +#define PICO_DOUBLE_HAS_FIX64_TO_DOUBLE_CONVERSIONS 1 +// rounding towards zero +#define PICO_DOUBLE_HAS_DOUBLE_TO_FIX32_Z_CONVERSIONS 1 +#define PICO_DOUBLE_HAS_DOUBLE_TO_FIX64_Z_CONVERSIONS 1 + +// rounding towards negative infinity +#define PICO_DOUBLE_HAS_DOUBLE_TO_INT32_M_CONVERSIONS 1 +#define PICO_DOUBLE_HAS_DOUBLE_TO_INT64_M_CONVERSIONS 1 +#define PICO_DOUBLE_HAS_DOUBLE_TO_FIX32_M_CONVERSIONS 1 +#define PICO_DOUBLE_HAS_DOUBLE_TO_FIX64_M_CONVERSIONS 1 + +#define PICO_DOUBLE_HAS_POWINT 1 + double int2double(int32_t i); double uint2double(uint32_t i); double int642double(int64_t i); @@ -143,8 +170,8 @@ double ufix642double(uint64_t m, int e); // These methods round towards 0, which IS the C way int32_t double2int_z(double f); -int64_t double2int64_z(double f); int32_t double2uint_z(double f); +int64_t double2int64_z(double f); int64_t double2uint64_z(double f); int32_t double2fix_z(double f, int e); uint32_t double2ufix_z(double f, int e); @@ -162,18 +189,31 @@ uint32_t double2ufix(double f, int e); int64_t double2fix64(double f, int e); uint64_t double2ufix64(double f, int e); +double powint(double x, int y); #endif double exp10(double x); +#if PICO_C_COMPILER_IS_CLANG && !LIB_PICO_DOUBLE_COMPILER +// clang unhelpfully splits sincosf into explict calls to sin & cos +extern void WRAPPER_FUNC(sincos)(double x, double *sinx, double *cosx); +#define sincos(x, sinx, cosx) WRAPPER_FUNC(sincos)(x, sinx, cosx) +#else void sincos(double x, double *sinx, double *cosx); -double powint(double x, int y); +#endif + #if PICO_RP2350 || PICO_COMBINED_DOCS + +#if LIB_PICO_DOUBLE_PICO_DCP +#define PICO_DOUBLE_HAS_DDIV_FAST 1 +#define PICO_DOUBLE_HAS_SQRT_FAST 1 +#define PICO_DOUBLE_HAS_FMA_FAST 1 double ddiv_fast(double n, double d); double sqrt_fast(double f); double fma_fast(double x, double y, double z); // this is not fused double mla(double x, double y, double z); // another name for fma_fast #endif +#endif #endif @@ -188,6 +228,14 @@ static inline int32_t double2int_z(double d) { return (int32_t)d; } static inline int64_t double2int64_z(double d) { return (int64_t)d; } static inline int32_t double2uint_z(double d) { return (uint32_t)d; } static inline int64_t double2uint64_z(double d) { return (uint64_t)d; } + +#if __has_builtin(__builtin_powi) +#define PICO_DOUBLE_HAS_POWINT 1 +static __force_inline double powint(double d, int32_t p) { + return __builtin_powi(d, p); +} +#endif + #endif #ifdef __cplusplus diff --git a/src/rp2_common/pico_float/CMakeLists.txt b/src/rp2_common/pico_float/CMakeLists.txt index 28eba6eb..a2474d22 100644 --- a/src/rp2_common/pico_float/CMakeLists.txt +++ b/src/rp2_common/pico_float/CMakeLists.txt @@ -174,6 +174,8 @@ #NO_SCI_EXTRA # todo - are our versions better than what GCC proides? NO_FMAF # direct VFP instruction support ) + # this allows inlining of sqrtf for example - if you really want errno support, use pico_float_compiler + target_compile_options(pico_float_pico_vfp INTERFACE -fno-math-errno) target_link_libraries(pico_float_pico INTERFACE pico_float_pico_vfp) diff --git a/src/rp2_common/pico_float/include/pico/float.h b/src/rp2_common/pico_float/include/pico/float.h index 1f5e3a64..1a01df02 100644 --- a/src/rp2_common/pico_float/include/pico/float.h +++ b/src/rp2_common/pico_float/include/pico/float.h @@ -93,7 +93,11 @@ extern "C" { * * - GNU extensions: * -* powintf, sincosf +* sincosf +* +* Additional functions on Arm: +* +* powintf * * On Arm, the following additional optimized functions are also provided (when using `_pico` variants of `pico_float`), all of which * saturate to the nearest representable value for too large input when converting from floating point types: @@ -150,10 +154,32 @@ extern "C" { * \endif */ +// === we always define these +#define PICO_FLOAT_HAS_INT32_TO_FLOAT_CONVERSIONS 1 +#define PICO_FLOAT_HAS_INT64_TO_FLOAT_CONVERSIONS 1 +// rounding towards zero +#define PICO_FLOAT_HAS_FLOAT_TO_INT32_Z_CONVERSIONS 1 +#define PICO_FLOAT_HAS_FLOAT_TO_INT64_Z_CONVERSIONS 1 +// === + // PICO_CONFIG: PICO_FLOAT_IN_RAM, Force placement of SDK provided single-precision floating point into RAM, type=bool, default=0, group=pico_float #if !defined(__riscv) || PICO_COMBINED_DOCS #if PICO_COMBINED_DOCS || !LIB_PICO_FLOAT_COMPILER +#define PICO_FLOAT_HAS_FIX32_TO_FLOAT_CONVERSIONS 1 +#define PICO_FLOAT_HAS_FIX64_TO_FLOAT_CONVERSIONS 1 +// rounding towards zero +#define PICO_FLOAT_HAS_FLOAT_TO_FIX32_Z_CONVERSIONS 1 +#define PICO_FLOAT_HAS_FLOAT_TO_FIX64_Z_CONVERSIONS 1 + +// rounding towards negative infinity +#define PICO_FLOAT_HAS_FLOAT_TO_INT32_M_CONVERSIONS 1 +#define PICO_FLOAT_HAS_FLOAT_TO_INT64_M_CONVERSIONS 1 +#define PICO_FLOAT_HAS_FLOAT_TO_FIX32_M_CONVERSIONS 1 +#define PICO_FLOAT_HAS_FLOAT_TO_FIX64_M_CONVERSIONS 1 + +#define PICO_FLOAT_HAS_POWINTF 1 + #if LIB_PICO_FLOAT_PICO_VFP // note these functions do still exist for assembler use, we would just prefer to let the compiler handle it for C/C++ to avoid a call static inline float int2float(int32_t i) { return (float)i; } @@ -164,6 +190,7 @@ float uint2float(uint32_t i); #endif float int642float(int64_t i); float uint642float(uint64_t i); + float fix2float(int32_t m, int e); float ufix2float(uint32_t m, int e); float fix642float(int64_t m, int e); @@ -196,6 +223,8 @@ uint32_t float2ufix(float f, int e); int64_t float2fix64(float f, int e); uint64_t float2ufix64(float f, int e); +float powintf(float x, int y); + #if LIB_PICO_FLOAT_PICO_VFP // a bit of a hack to inline VFP fixed point conversion when exponent is constant and in range 1-32 #define fix2float(m, e) __builtin_choose_expr(__builtin_constant_p(e), (e) >= 1 && (e) <= 32 ? _fix2float_inline(m, e) : fix2 ## float(m, e), fix2 ## float(m, e)) @@ -293,10 +322,17 @@ uint64_t float2ufix64(float f, int e); #endif float exp10f(float x); +#if PICO_C_COMPILER_IS_CLANG && !LIB_PICO_FLOAT_COMPILER +// clang unhelpfully splits sincosf into explict calls to sin & cos +extern void WRAPPER_FUNC(sincosf)(float x, float *sinx, float *cosx); +#define sincosf(x, sinx, cosx) WRAPPER_FUNC(sincosf)(x, sinx, cosx) +#else void sincosf(float x, float *sinx, float *cosx); -float powintf(float x, int y); +#endif -#if PICO_RP2350 || PICO_COMBINED_DOCS +#if (PICO_RP2350 && LIB_PICO_FLOAT_PICO_DCP) || PICO_COMBINED_DOCS +#define PICO_FLOAT_HAS_FDIV_FAST 1 +#define PICO_FLOAT_HAS_SQRTF_FAST 1 float fdiv_fast(float n, float d); float sqrtf_fast(float f); #endif @@ -315,6 +351,13 @@ static inline int32_t float2int_z(float f) { return (int32_t)f; } static inline int64_t float2int64_z(float f) { return (int64_t)f; } static inline int32_t float2uint_z(float f) { return (uint32_t)f; } static inline int64_t float2uint64_z(float f) { return (uint64_t)f; } + +#if __has_builtin(__builtin_powif) +#define PICO_FLOAT_HAS_POWINTF 1 +static __force_inline float powintf(float f, int32_t p) { + return __builtin_powif(f, p); +} +#endif #endif #ifdef __cplusplus diff --git a/test/pico_float_test/CMakeLists.txt b/test/pico_float_test/CMakeLists.txt index 032497f5..a80dfcc6 100644 --- a/test/pico_float_test/CMakeLists.txt +++ b/test/pico_float_test/CMakeLists.txt @@ -70,6 +70,16 @@ foreach (FLOAT_TYPE IN LISTS FLOAT_TYPES) target_compile_options(${PICO_FLOAT_TEST} PRIVATE -fno-strict-float-cast-overflow) target_compile_options(custom_float_funcs_test_${FLOAT_TYPE} PRIVATE -fno-strict-float-cast-overflow) endif() + + if (NOT PICO_RISCV) # todo need risc-v support too + add_executable(float_benchmark_${FLOAT_TYPE} float_benchmark.c) + pico_set_float_implementation(float_benchmark_${FLOAT_TYPE} ${FLOAT_TYPE}) + target_link_libraries(float_benchmark_${FLOAT_TYPE} PRIVATE pico_stdlib m) + pico_add_extra_outputs(float_benchmark_${FLOAT_TYPE}) + target_compile_definitions(float_benchmark_${FLOAT_TYPE} PRIVATE PICO_FLOAT_IN_RAM=1) + pico_set_printf_implementation(float_benchmark_${FLOAT_TYPE} compiler) + pico_set_binary_type(float_benchmark_${FLOAT_TYPE} copy_to_ram) + endif() endforeach () foreach (DOUBLE_TYPE IN LISTS DOUBLE_TYPES) @@ -116,6 +126,16 @@ foreach (DOUBLE_TYPE IN LISTS DOUBLE_TYPES) target_compile_options(custom_double_funcs_test_${DOUBLE_TYPE} PRIVATE -fno-strict-float-cast-overflow) endif() endif() + + if (NOT PICO_RISCV) # todo need risc-v support too + add_executable(double_benchmark_${DOUBLE_TYPE} double_benchmark.c) + pico_set_double_implementation(double_benchmark_${DOUBLE_TYPE} ${DOUBLE_TYPE}) + target_link_libraries(double_benchmark_${DOUBLE_TYPE} PRIVATE pico_stdlib m) + pico_add_extra_outputs(double_benchmark_${DOUBLE_TYPE}) + target_compile_definitions(double_benchmark_${DOUBLE_TYPE} PRIVATE PICO_DOUBLE_IN_RAM=1) + pico_set_printf_implementation(double_benchmark_${DOUBLE_TYPE} compiler) + pico_set_binary_type(double_benchmark_${DOUBLE_TYPE} copy_to_ram) + endif() endforeach () if (PICO_RP2350 AND NOT PICO_RISCV) diff --git a/test/pico_float_test/custom_double_funcs_test.c b/test/pico_float_test/custom_double_funcs_test.c index 928d87a0..db641e14 100644 --- a/test/pico_float_test/custom_double_funcs_test.c +++ b/test/pico_float_test/custom_double_funcs_test.c @@ -473,8 +473,8 @@ int test() { test_checki(double2int(-2147483648.1), INT32_MIN, "double2int17"); test_checki(double2int(-21474836480.1), INT32_MIN, "double2int18"); test_checki(double2int(make_positive_denormal_double()), 0, "double2int19"); - double double2int20 = double2int(make_negative_denormal_double()); - if (double2int20 == -1.0) double2int20 = 0; // -1 is a valid answer depending on flush to zero + int double2int20 = double2int(make_negative_denormal_double()); + if (double2int20 == -1) double2int20 = 0; // -1 is a valid answer depending on flush to zero test_checki(double2int20, 0, "double2int20"); printf("double2uint\n"); diff --git a/test/pico_float_test/double_benchmark.c b/test/pico_float_test/double_benchmark.c new file mode 100644 index 00000000..8ba3f1c8 --- /dev/null +++ b/test/pico_float_test/double_benchmark.c @@ -0,0 +1,1344 @@ +#include +#include +#include "pico/stdlib.h" +#include "pico/double.h" +#include "pico/platform/cpu_regs.h" + +#if defined(LLVM_LIBC_COMMON_H) && !defined(__LLVM_LIBC__) +#define __LLVM_LIBC__ 1 +#endif + +static void init_systick() { + systick_hw->csr = 0; + systick_hw->rvr = ARM_CPU_PREFIXED(SYST_RVR_RELOAD_BITS); + systick_hw->csr = ARM_CPU_PREFIXED(SYST_CSR_CLKSOURCE_BITS) | ARM_CPU_PREFIXED(SYST_CSR_ENABLE_BITS); +} + +// Stop the compiler from constant-folding a hardware base pointer into the +// pointers to individual registers, in cases where constant folding has +// produced redundant 32-bit pointer literals that could have been load/store +// offsets. (Note typeof(ptr+0) gives non-const, for +r constraint.) E.g. +// uart_hw_t *uart0 = __get_opaque_ptr(uart0_hw); +#define __get_opaque_ptr(ptr) ({ \ + typeof((ptr)+0) __opaque_ptr = (ptr); \ + asm ("" : "+r"(__opaque_ptr)); \ + __opaque_ptr; \ +}) + +static __force_inline uint32_t systick_value() { + return systick_hw->cvr; +} + +static __force_inline io_ro_32 *systick_value_ptr() { + return __get_opaque_ptr(&systick_hw->cvr); +} + +static int cycle_diff(uint32_t systick1, uint32_t systick2) { + static_assert(ARM_CPU_PREFIXED(SYST_CVR_CURRENT_LSB) == 0, ""); + uint32_t shift = 32 - ARM_CPU_PREFIXED(SYST_CVR_CURRENT_MSB); + return (((int32_t)((systick1 << shift) - (systick2 << shift))) >> shift) - 1; // -1 since the second systick read costs one +} + +#define timer_func_def(name) static __noinline int __not_in_flash_func(time_##name) + +static double d_a[] = {1.3, -200.3, 1.6e15, 1e-2}; +static double d_b[] = {-121.3, 50.3, 27.9, 1.7e23}; +static double d_c[] = {20.3, -50.3, -3.9e-3, -4.1e7}; +static double d_m1to1[] = {-0.5, .9999, 0.1, -0.999999}; + +static int32_t i_pow[] = {3,6,27,-10}; +static double d_positive[] = {0.0, 3.7, 1245325., 1e27}; +static double d_1plus[] = {1.0, 3.7, 1245325., 1e27}; + +static double d_smaller[] = {-1000.3, 200.3, 1.6e15}; +static double d_bigger[] = {-121.3, 5000.3, 1.6e16}; + +static int32_t i_32[] = { 0, 3, -200, INT32_MIN, INT32_MAX }; +static int64_t i_64[] = { 0, 3, -200, 0x123456789abcll, -0x123456789abcll, INT64_MIN, INT64_MAX }; + +// bits for fixed point conversions +static int32_t n_32[] = { 0, 3, -3, 16, -16 }; + +static_assert(count_of(d_a) == count_of(d_b), ""); +static_assert(count_of(d_a) == count_of(d_c), ""); +static_assert(count_of(d_a) == count_of(i_pow), ""); +static_assert(count_of(d_a) == count_of(d_positive), ""); + +static_assert(count_of(d_smaller) == count_of(d_bigger), ""); + +static double time_unary_func(int (*timer)(double), double *d, uint count) { + double total = 0.f; + for (uint i=0;i +#include +#include "pico/stdlib.h" +#include "pico/float.h" +#include "pico/platform/cpu_regs.h" + +#if defined(LLVM_LIBC_COMMON_H) && !defined(__LLVM_LIBC__) +#define __LLVM_LIBC__ 1 +#endif + +static void init_systick() { + systick_hw->csr = 0; + systick_hw->rvr = ARM_CPU_PREFIXED(SYST_RVR_RELOAD_BITS); + systick_hw->csr = ARM_CPU_PREFIXED(SYST_CSR_CLKSOURCE_BITS) | ARM_CPU_PREFIXED(SYST_CSR_ENABLE_BITS); +} + +// Stop the compiler from constant-folding a hardware base pointer into the +// pointers to individual registers, in cases where constant folding has +// produced redundant 32-bit pointer literals that could have been load/store +// offsets. (Note typeof(ptr+0) gives non-const, for +r constraint.) E.g. +// uart_hw_t *uart0 = __get_opaque_ptr(uart0_hw); +#define __get_opaque_ptr(ptr) ({ \ + typeof((ptr)+0) __opaque_ptr = (ptr); \ + asm ("" : "+r"(__opaque_ptr)); \ + __opaque_ptr; \ +}) + +static __force_inline uint32_t systick_value() { + return systick_hw->cvr; +} + +static __force_inline io_ro_32 *systick_value_ptr() { + return __get_opaque_ptr(&systick_hw->cvr); +} + +static int cycle_diff(uint32_t systick1, uint32_t systick2) { + static_assert(ARM_CPU_PREFIXED(SYST_CVR_CURRENT_LSB) == 0, ""); + uint32_t shift = 32 - ARM_CPU_PREFIXED(SYST_CVR_CURRENT_MSB); + return (((int32_t)((systick1 << shift) - (systick2 << shift))) >> shift) - 1; // -1 since the second systick read costs one +} + +#define timer_func_def(name) static __noinline int __not_in_flash_func(time_##name) + +static float f_a[] = {1.3f, -200.3f, 1.6e15f, 1e-2f}; +static float f_b[] = {-121.3f, 50.3f, 27.9f, 1.7e23f}; +static float f_c[] = {20.3f, -50.3f, -3.9e-3f, -4.1e7f}; +static float f_m1to1[] = {-0.5f, .9999f, 0.1f, -0.999999f}; + +static int32_t i_pow[] = {3,6,27,-10}; +static float f_positive[] = {0.0f, 3.7f, 1245325.f, 1e27f}; +static float f_1plus[] = {1.0f, 3.7f, 1245325.f, 1e27f}; + +static float f_smaller[] = {-1000.3f, 200.3f, 1.6e15f}; +static float f_bigger[] = {-121.3f, 5000.3f, 1.6e16f}; + +static int32_t i_32[] = { 0, 3, -200, INT32_MIN, INT32_MAX }; +static int64_t i_64[] = { 0, 3, -200, 0x123456789abcll, -0x123456789abcll, INT64_MIN, INT64_MAX }; + +// bits for fixed point conversions +static int32_t n_32[] = { 0, 3, -3, 16, -16 }; + +static_assert(count_of(f_a) == count_of(f_b), ""); +static_assert(count_of(f_a) == count_of(f_c), ""); +static_assert(count_of(f_a) == count_of(i_pow), ""); +static_assert(count_of(f_a) == count_of(f_positive), ""); + +static_assert(count_of(f_smaller) == count_of(f_bigger), ""); + +static float time_unary_func(int (*timer)(float), float *f, uint count) { + float total = 0.f; + for (uint i=0;i=0.5f; x/=2.f) { printf("f2i64 %f->%lld\n", x, (int64_t)x); -#if PICO_RP2040 if ((double)x >= (double)INT64_MAX) { #if TEST_SATURATION test_assert(__aeabi_f2lz(x) == INT64_MAX); #endif } else { +#if PICO_RP2040 check1(__aeabi_f2lz, x); - } #else - check1_vfp_unwrapped(__aeabi_f2lz, x); + check1_vfp_unwrapped(__aeabi_f2lz, x); #endif + } } for(float x = -4294967296.f * 4294967296.f; x<=-0.5f; x/=2.f) { printf("f2i32 %f->%d\n", x, (int32_t)x); @@ -579,17 +579,17 @@ int main() { } for(float x = 4294967296.f * 4294967296.f; x>=0.5f; x/=2.f) { printf("f2i32 %f->%d\n", x, (int32_t)x); -#if PICO_RP2040 if ((double)x >= (double)INT32_MAX) { #if TEST_SATURATION test_assert(__aeabi_f2iz(x) == INT32_MAX); #endif } else { +#if PICO_RP2040 check1(__aeabi_f2iz, x); - } #else - check1_vfp_unwrapped(__aeabi_f2iz, x); + check1_vfp_unwrapped(__aeabi_f2iz, x); #endif + } } for (float x = 1; x < 11; x += 2) {