diff --git a/src/rp2_common/pico_double/CMakeLists.txt b/src/rp2_common/pico_double/CMakeLists.txt index c038db86..2d5ac90e 100644 --- a/src/rp2_common/pico_double/CMakeLists.txt +++ b/src/rp2_common/pico_double/CMakeLists.txt @@ -23,68 +23,84 @@ if (NOT TARGET pico_double) $>,$,${PICO_DEFAULT_DOUBLE_IMPL}>) function(wrap_double_functions TARGET) - pico_wrap_function(${TARGET} __aeabi_dadd) - pico_wrap_function(${TARGET} __aeabi_ddiv) - pico_wrap_function(${TARGET} __aeabi_dmul) - pico_wrap_function(${TARGET} __aeabi_drsub) - pico_wrap_function(${TARGET} __aeabi_dsub) - pico_wrap_function(${TARGET} __aeabi_cdcmpeq) - pico_wrap_function(${TARGET} __aeabi_cdrcmple) - pico_wrap_function(${TARGET} __aeabi_cdcmple) - pico_wrap_function(${TARGET} __aeabi_dcmpeq) - pico_wrap_function(${TARGET} __aeabi_dcmplt) - pico_wrap_function(${TARGET} __aeabi_dcmple) - pico_wrap_function(${TARGET} __aeabi_dcmpge) - pico_wrap_function(${TARGET} __aeabi_dcmpgt) - pico_wrap_function(${TARGET} __aeabi_dcmpun) - pico_wrap_function(${TARGET} __aeabi_i2d) - pico_wrap_function(${TARGET} __aeabi_l2d) - pico_wrap_function(${TARGET} __aeabi_ui2d) - pico_wrap_function(${TARGET} __aeabi_ul2d) - pico_wrap_function(${TARGET} __aeabi_d2iz) - pico_wrap_function(${TARGET} __aeabi_d2lz) - pico_wrap_function(${TARGET} __aeabi_d2uiz) - pico_wrap_function(${TARGET} __aeabi_d2ulz) - pico_wrap_function(${TARGET} __aeabi_d2f) - pico_wrap_function(${TARGET} sqrt) - pico_wrap_function(${TARGET} cos) - pico_wrap_function(${TARGET} sin) - pico_wrap_function(${TARGET} tan) - pico_wrap_function(${TARGET} atan2) - pico_wrap_function(${TARGET} exp) - pico_wrap_function(${TARGET} log) - - pico_wrap_function(${TARGET} ldexp) - pico_wrap_function(${TARGET} copysign) - pico_wrap_function(${TARGET} trunc) - pico_wrap_function(${TARGET} floor) - pico_wrap_function(${TARGET} ceil) - pico_wrap_function(${TARGET} round) - pico_wrap_function(${TARGET} sincos) # gnu - pico_wrap_function(${TARGET} asin) - pico_wrap_function(${TARGET} acos) - pico_wrap_function(${TARGET} atan) - pico_wrap_function(${TARGET} sinh) - pico_wrap_function(${TARGET} cosh) - pico_wrap_function(${TARGET} tanh) - pico_wrap_function(${TARGET} asinh) - pico_wrap_function(${TARGET} acosh) - pico_wrap_function(${TARGET} atanh) - pico_wrap_function(${TARGET} exp2) - pico_wrap_function(${TARGET} log2) - pico_wrap_function(${TARGET} exp10) - pico_wrap_function(${TARGET} log10) - pico_wrap_function(${TARGET} pow) - pico_wrap_function(${TARGET} powint) #gnu - pico_wrap_function(${TARGET} hypot) - pico_wrap_function(${TARGET} cbrt) - pico_wrap_function(${TARGET} fmod) - pico_wrap_function(${TARGET} drem) - pico_wrap_function(${TARGET} remainder) - pico_wrap_function(${TARGET} remquo) - pico_wrap_function(${TARGET} expm1) - pico_wrap_function(${TARGET} log1p) - pico_wrap_function(${TARGET} fma) + cmake_parse_arguments(WRAP_DOUBLE "NO_AEABI_ARITHMETIC;NO_AEABI_CMP;NO_AEABI_CONV_32;NO_AEABI_CONV_64;NO_AEABI_CONV_FLOAT;NO_SQRT;NO_SCI;NO_SCI_EXTRA" "" "" ${ARGN} ) + if (NOT WRAP_DOUBLE_NO_AEABI_ARITHMETIC) + pico_wrap_function(${TARGET} __aeabi_dadd) + pico_wrap_function(${TARGET} __aeabi_ddiv) + pico_wrap_function(${TARGET} __aeabi_dmul) + pico_wrap_function(${TARGET} __aeabi_drsub) + pico_wrap_function(${TARGET} __aeabi_dsub) + endif() + if (NOT WRAP_DOUBLE_NO_AEABI_CMP) + pico_wrap_function(${TARGET} __aeabi_cdcmpeq) + pico_wrap_function(${TARGET} __aeabi_cdrcmple) + pico_wrap_function(${TARGET} __aeabi_cdcmple) + pico_wrap_function(${TARGET} __aeabi_dcmpeq) + pico_wrap_function(${TARGET} __aeabi_dcmplt) + pico_wrap_function(${TARGET} __aeabi_dcmple) + pico_wrap_function(${TARGET} __aeabi_dcmpge) + pico_wrap_function(${TARGET} __aeabi_dcmpgt) + pico_wrap_function(${TARGET} __aeabi_dcmpun) + endif() + if (NOT WRAP_DOUBLE_NO_AEABI_CONV_32) + pico_wrap_function(${TARGET} __aeabi_i2d) + pico_wrap_function(${TARGET} __aeabi_ui2d) + pico_wrap_function(${TARGET} __aeabi_d2iz) + pico_wrap_function(${TARGET} __aeabi_d2uiz) + endif() + if (NOT WRAP_DOUBLE_NO_AEABI_CONV_64) + pico_wrap_function(${TARGET} __aeabi_l2d) + pico_wrap_function(${TARGET} __aeabi_ul2d) + pico_wrap_function(${TARGET} __aeabi_d2lz) + pico_wrap_function(${TARGET} __aeabi_d2ulz) + endif() + if (NOT WRAP_DOUBLE_NO_AEABI_CONV_FLOAT) + pico_wrap_function(${TARGET} __aeabi_d2f) + endif() + if (NOT WRAP_DOUBLE_NO_SQRT) + pico_wrap_function(${TARGET} sqrt) + endif() + if (NOT WRAP_DOUBLE_NO_SCI) + pico_wrap_function(${TARGET} cos) + pico_wrap_function(${TARGET} sin) + pico_wrap_function(${TARGET} tan) + pico_wrap_function(${TARGET} atan2) + pico_wrap_function(${TARGET} exp) + pico_wrap_function(${TARGET} log) + endif() + if (NOT WRAP_DOUBLE_NO_SCI_EXTRA) + pico_wrap_function(${TARGET} ldexp) + pico_wrap_function(${TARGET} copysign) + pico_wrap_function(${TARGET} trunc) + pico_wrap_function(${TARGET} floor) + pico_wrap_function(${TARGET} ceil) + pico_wrap_function(${TARGET} round) + pico_wrap_function(${TARGET} sincos) # gnu + pico_wrap_function(${TARGET} asin) + pico_wrap_function(${TARGET} acos) + pico_wrap_function(${TARGET} atan) + pico_wrap_function(${TARGET} sinh) + pico_wrap_function(${TARGET} cosh) + pico_wrap_function(${TARGET} tanh) + pico_wrap_function(${TARGET} asinh) + pico_wrap_function(${TARGET} acosh) + pico_wrap_function(${TARGET} atanh) + pico_wrap_function(${TARGET} exp2) + pico_wrap_function(${TARGET} log2) + pico_wrap_function(${TARGET} exp10) + pico_wrap_function(${TARGET} log10) + pico_wrap_function(${TARGET} pow) + pico_wrap_function(${TARGET} powint) #gnu + pico_wrap_function(${TARGET} hypot) + pico_wrap_function(${TARGET} cbrt) + pico_wrap_function(${TARGET} fmod) + pico_wrap_function(${TARGET} drem) + pico_wrap_function(${TARGET} remainder) + pico_wrap_function(${TARGET} remquo) + pico_wrap_function(${TARGET} expm1) + pico_wrap_function(${TARGET} log1p) + pico_wrap_function(${TARGET} fma) + endif() endfunction() pico_add_library(pico_double_pico) @@ -96,8 +112,8 @@ if (NOT TARGET pico_double) ${CMAKE_CURRENT_LIST_DIR}/double_v1_rom_shim_rp2040.S ) target_link_libraries(pico_double_pico INTERFACE pico_bootrom pico_double_headers hardware_divider) - wrap_double_functions(pico_double_pico) - elseif(NOT PICO_RISCV) + wrap_double_functions(pico_double_pico) # wrap everything + elseif(PICO_RP2350 AND NOT PICO_RISCV) pico_add_library(pico_double_pico_dcp) target_sources(pico_double_pico_dcp INTERFACE ${CMAKE_CURRENT_LIST_DIR}/double_math.c @@ -107,11 +123,10 @@ if (NOT TARGET pico_double) ${CMAKE_CURRENT_LIST_DIR}/double_conv_m33.S ) target_link_libraries(pico_double_pico_dcp INTERFACE pico_double_headers) - wrap_double_functions(pico_double_pico_dcp) + wrap_double_functions(pico_double_pico_dcp) #wrap everything target_link_libraries(pico_double_pico INTERFACE pico_double_pico_dcp) endif() - pico_add_library(pico_double_none) target_sources(pico_double_none INTERFACE ${CMAKE_CURRENT_LIST_DIR}/double_none.S diff --git a/src/rp2_common/pico_double/include/pico/double.h b/src/rp2_common/pico_double/include/pico/double.h index 188c34f7..9afce8bb 100644 --- a/src/rp2_common/pico_double/include/pico/double.h +++ b/src/rp2_common/pico_double/include/pico/double.h @@ -84,7 +84,10 @@ extern "C" { * * - GNU extensions: * -* powint, sincos +* sincos +* +* Additional functions on Arm: +* powint * * On Arm, the following additional optimized functions are also provided when using `pico_double_pico`, all of which * saturate to the nearest representable value for too large input when converting from floating point types: @@ -129,9 +132,33 @@ extern "C" { * On RISC-V there is no custom double-precision floating point support, so `pico_double_pico` is equivalent to `pico_double_compiler` * \endif */ + +// === we always define these +#define PICO_DOUBLE_HAS_INT32_TO_DOUBLE_CONVERSIONS 1 +#define PICO_DOUBLE_HAS_INT64_TO_DOUBLE_CONVERSIONS 1 +// rounding towards zero +#define PICO_DOUBLE_HAS_DOUBLE_TO_INT32_Z_CONVERSIONS 1 +#define PICO_DOUBLE_HAS_DOUBLE_TO_INT64_Z_CONVERSIONS 1 +// === + +// PICO_CONFIG: PICO_DOUBLE_IN_RAM, Force placement of SDK provided double-precision floating point into RAM, type=bool, default=0, group=pico_float #if !defined(__riscv) || PICO_COMBINED_DOCS #if PICO_COMBINED_DOCS || !LIB_PICO_DOUBLE_COMPILER +#define PICO_DOUBLE_HAS_FIX32_TO_DOUBLE_CONVERSIONS 1 +#define PICO_DOUBLE_HAS_FIX64_TO_DOUBLE_CONVERSIONS 1 +// rounding towards zero +#define PICO_DOUBLE_HAS_DOUBLE_TO_FIX32_Z_CONVERSIONS 1 +#define PICO_DOUBLE_HAS_DOUBLE_TO_FIX64_Z_CONVERSIONS 1 + +// rounding towards negative infinity +#define PICO_DOUBLE_HAS_DOUBLE_TO_INT32_M_CONVERSIONS 1 +#define PICO_DOUBLE_HAS_DOUBLE_TO_INT64_M_CONVERSIONS 1 +#define PICO_DOUBLE_HAS_DOUBLE_TO_FIX32_M_CONVERSIONS 1 +#define PICO_DOUBLE_HAS_DOUBLE_TO_FIX64_M_CONVERSIONS 1 + +#define PICO_DOUBLE_HAS_POWINT 1 + double int2double(int32_t i); double uint2double(uint32_t i); double int642double(int64_t i); @@ -143,8 +170,8 @@ double ufix642double(uint64_t m, int e); // These methods round towards 0, which IS the C way int32_t double2int_z(double f); -int64_t double2int64_z(double f); int32_t double2uint_z(double f); +int64_t double2int64_z(double f); int64_t double2uint64_z(double f); int32_t double2fix_z(double f, int e); uint32_t double2ufix_z(double f, int e); @@ -162,18 +189,31 @@ uint32_t double2ufix(double f, int e); int64_t double2fix64(double f, int e); uint64_t double2ufix64(double f, int e); +double powint(double x, int y); #endif double exp10(double x); +#if PICO_C_COMPILER_IS_CLANG && !LIB_PICO_DOUBLE_COMPILER +// clang unhelpfully splits sincosf into explict calls to sin & cos +extern void WRAPPER_FUNC(sincos)(double x, double *sinx, double *cosx); +#define sincos(x, sinx, cosx) WRAPPER_FUNC(sincos)(x, sinx, cosx) +#else void sincos(double x, double *sinx, double *cosx); -double powint(double x, int y); +#endif + #if PICO_RP2350 || PICO_COMBINED_DOCS + +#if LIB_PICO_DOUBLE_PICO_DCP +#define PICO_DOUBLE_HAS_DDIV_FAST 1 +#define PICO_DOUBLE_HAS_SQRT_FAST 1 +#define PICO_DOUBLE_HAS_FMA_FAST 1 double ddiv_fast(double n, double d); double sqrt_fast(double f); double fma_fast(double x, double y, double z); // this is not fused double mla(double x, double y, double z); // another name for fma_fast #endif +#endif #endif @@ -188,6 +228,14 @@ static inline int32_t double2int_z(double d) { return (int32_t)d; } static inline int64_t double2int64_z(double d) { return (int64_t)d; } static inline int32_t double2uint_z(double d) { return (uint32_t)d; } static inline int64_t double2uint64_z(double d) { return (uint64_t)d; } + +#if __has_builtin(__builtin_powi) +#define PICO_DOUBLE_HAS_POWINT 1 +static __force_inline double powint(double d, int32_t p) { + return __builtin_powi(d, p); +} +#endif + #endif #ifdef __cplusplus diff --git a/src/rp2_common/pico_float/CMakeLists.txt b/src/rp2_common/pico_float/CMakeLists.txt index 28eba6eb..a2474d22 100644 --- a/src/rp2_common/pico_float/CMakeLists.txt +++ b/src/rp2_common/pico_float/CMakeLists.txt @@ -174,6 +174,8 @@ #NO_SCI_EXTRA # todo - are our versions better than what GCC proides? NO_FMAF # direct VFP instruction support ) + # this allows inlining of sqrtf for example - if you really want errno support, use pico_float_compiler + target_compile_options(pico_float_pico_vfp INTERFACE -fno-math-errno) target_link_libraries(pico_float_pico INTERFACE pico_float_pico_vfp) diff --git a/src/rp2_common/pico_float/include/pico/float.h b/src/rp2_common/pico_float/include/pico/float.h index 1f5e3a64..1a01df02 100644 --- a/src/rp2_common/pico_float/include/pico/float.h +++ b/src/rp2_common/pico_float/include/pico/float.h @@ -93,7 +93,11 @@ extern "C" { * * - GNU extensions: * -* powintf, sincosf +* sincosf +* +* Additional functions on Arm: +* +* powintf * * On Arm, the following additional optimized functions are also provided (when using `_pico` variants of `pico_float`), all of which * saturate to the nearest representable value for too large input when converting from floating point types: @@ -150,10 +154,32 @@ extern "C" { * \endif */ +// === we always define these +#define PICO_FLOAT_HAS_INT32_TO_FLOAT_CONVERSIONS 1 +#define PICO_FLOAT_HAS_INT64_TO_FLOAT_CONVERSIONS 1 +// rounding towards zero +#define PICO_FLOAT_HAS_FLOAT_TO_INT32_Z_CONVERSIONS 1 +#define PICO_FLOAT_HAS_FLOAT_TO_INT64_Z_CONVERSIONS 1 +// === + // PICO_CONFIG: PICO_FLOAT_IN_RAM, Force placement of SDK provided single-precision floating point into RAM, type=bool, default=0, group=pico_float #if !defined(__riscv) || PICO_COMBINED_DOCS #if PICO_COMBINED_DOCS || !LIB_PICO_FLOAT_COMPILER +#define PICO_FLOAT_HAS_FIX32_TO_FLOAT_CONVERSIONS 1 +#define PICO_FLOAT_HAS_FIX64_TO_FLOAT_CONVERSIONS 1 +// rounding towards zero +#define PICO_FLOAT_HAS_FLOAT_TO_FIX32_Z_CONVERSIONS 1 +#define PICO_FLOAT_HAS_FLOAT_TO_FIX64_Z_CONVERSIONS 1 + +// rounding towards negative infinity +#define PICO_FLOAT_HAS_FLOAT_TO_INT32_M_CONVERSIONS 1 +#define PICO_FLOAT_HAS_FLOAT_TO_INT64_M_CONVERSIONS 1 +#define PICO_FLOAT_HAS_FLOAT_TO_FIX32_M_CONVERSIONS 1 +#define PICO_FLOAT_HAS_FLOAT_TO_FIX64_M_CONVERSIONS 1 + +#define PICO_FLOAT_HAS_POWINTF 1 + #if LIB_PICO_FLOAT_PICO_VFP // note these functions do still exist for assembler use, we would just prefer to let the compiler handle it for C/C++ to avoid a call static inline float int2float(int32_t i) { return (float)i; } @@ -164,6 +190,7 @@ float uint2float(uint32_t i); #endif float int642float(int64_t i); float uint642float(uint64_t i); + float fix2float(int32_t m, int e); float ufix2float(uint32_t m, int e); float fix642float(int64_t m, int e); @@ -196,6 +223,8 @@ uint32_t float2ufix(float f, int e); int64_t float2fix64(float f, int e); uint64_t float2ufix64(float f, int e); +float powintf(float x, int y); + #if LIB_PICO_FLOAT_PICO_VFP // a bit of a hack to inline VFP fixed point conversion when exponent is constant and in range 1-32 #define fix2float(m, e) __builtin_choose_expr(__builtin_constant_p(e), (e) >= 1 && (e) <= 32 ? _fix2float_inline(m, e) : fix2 ## float(m, e), fix2 ## float(m, e)) @@ -293,10 +322,17 @@ uint64_t float2ufix64(float f, int e); #endif float exp10f(float x); +#if PICO_C_COMPILER_IS_CLANG && !LIB_PICO_FLOAT_COMPILER +// clang unhelpfully splits sincosf into explict calls to sin & cos +extern void WRAPPER_FUNC(sincosf)(float x, float *sinx, float *cosx); +#define sincosf(x, sinx, cosx) WRAPPER_FUNC(sincosf)(x, sinx, cosx) +#else void sincosf(float x, float *sinx, float *cosx); -float powintf(float x, int y); +#endif -#if PICO_RP2350 || PICO_COMBINED_DOCS +#if (PICO_RP2350 && LIB_PICO_FLOAT_PICO_DCP) || PICO_COMBINED_DOCS +#define PICO_FLOAT_HAS_FDIV_FAST 1 +#define PICO_FLOAT_HAS_SQRTF_FAST 1 float fdiv_fast(float n, float d); float sqrtf_fast(float f); #endif @@ -315,6 +351,13 @@ static inline int32_t float2int_z(float f) { return (int32_t)f; } static inline int64_t float2int64_z(float f) { return (int64_t)f; } static inline int32_t float2uint_z(float f) { return (uint32_t)f; } static inline int64_t float2uint64_z(float f) { return (uint64_t)f; } + +#if __has_builtin(__builtin_powif) +#define PICO_FLOAT_HAS_POWINTF 1 +static __force_inline float powintf(float f, int32_t p) { + return __builtin_powif(f, p); +} +#endif #endif #ifdef __cplusplus