From 82654f1f5ab4e957c660f358e5cbb00d611e8d44 Mon Sep 17 00:00:00 2001 From: anzz1 Date: Wed, 22 Mar 2023 04:05:49 +0200 Subject: [PATCH] Enable F16C/CVT16 vector extensions on MSVC __F16C__ macro does not exist in MSVC, but is implied with AVX2/AVX512 --- ggml.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/ggml.c b/ggml.c index 76d812f..b74b04b 100644 --- a/ggml.c +++ b/ggml.c @@ -76,6 +76,16 @@ static int sched_yield (void) { typedef void* thread_ret_t; #endif +// __FMA__ and __F16C__ are not defined in MSVC, however they are implied with AVX2/AVX512 +#if defined(_MSC_VER) && (defined(__AVX2__) || defined(__AVX512F__)) +#ifndef __FMA__ +#define __FMA__ +#endif +#ifndef __F16C__ +#define __F16C__ +#endif +#endif + #ifdef __HAIKU__ #define static_assert(cond, msg) _Static_assert(cond, msg) #endif @@ -359,16 +369,9 @@ static const size_t CACHE_LINE_SIZE_F32 = CACHE_LINE_SIZE/sizeof(float); #define QK 32 -#if __AVX2__ || __AVX512F__ - -// __FMA__ is not defined in MSVC, however it is implied with AVX2/AVX512 -#if defined(_MSC_VER) && !defined(__FMA__) -#define __FMA__ -#endif - // AVX routines provided by GH user Const-me // ref: https://github.com/ggerganov/ggml/pull/27#issuecomment-1464934600 - +#if __AVX2__ || __AVX512F__ // Unpack 32 4-bit fields into 32 bytes // The output vector contains 32 bytes, each one in [ 0 .. 15 ] interval static inline __m256i bytesFromNibbles( const uint8_t* rsi )