Enable Fused-Multiply-Add (FMA) instructions on MSVC

__FMA__ macro does not exist in MSVC
llama-patch-enable-fma-msvc
anzz1 3 years ago committed by GitHub
parent da0e9fe90c
commit cf5b08482f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -359,9 +359,16 @@ static const size_t CACHE_LINE_SIZE_F32 = CACHE_LINE_SIZE/sizeof(float);
#define QK 32 #define QK 32
#if __AVX2__ || __AVX512F__
// __FMA__ is not defined in MSVC, however it is implied with AVX2/AVX512
#if defined(_MSC_VER) && !defined(__FMA__)
#define __FMA__
#endif
// AVX routines provided by GH user Const-me // AVX routines provided by GH user Const-me
// ref: https://github.com/ggerganov/ggml/pull/27#issuecomment-1464934600 // ref: https://github.com/ggerganov/ggml/pull/27#issuecomment-1464934600
#if __AVX2__ || __AVX512F__
// Unpack 32 4-bit fields into 32 bytes // Unpack 32 4-bit fields into 32 bytes
// The output vector contains 32 bytes, each one in [ 0 .. 15 ] interval // The output vector contains 32 bytes, each one in [ 0 .. 15 ] interval
static inline __m256i bytesFromNibbles( const uint8_t* rsi ) static inline __m256i bytesFromNibbles( const uint8_t* rsi )

Loading…
Cancel
Save