mirror of https://github.com/novarobot/llama.cpp
Compare commits
No commits in common. 'master' and 'master-56e659a' have entirely different histories.
master
...
master-56e
@ -0,0 +1,66 @@
|
|||||||
|
import os
|
||||||
|
import sys
|
||||||
|
from tqdm import tqdm
|
||||||
|
import requests
|
||||||
|
|
||||||
|
if len(sys.argv) < 3:
|
||||||
|
print("Usage: download-pth.py dir-model model-type\n")
|
||||||
|
print(" model-type: Available models 7B, 13B, 30B or 65B")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
modelsDir = sys.argv[1]
|
||||||
|
model = sys.argv[2]
|
||||||
|
|
||||||
|
num = {
|
||||||
|
"7B": 1,
|
||||||
|
"13B": 2,
|
||||||
|
"30B": 4,
|
||||||
|
"65B": 8,
|
||||||
|
}
|
||||||
|
|
||||||
|
if model not in num:
|
||||||
|
print(f"Error: model {model} is not valid, provide 7B, 13B, 30B or 65B")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
print(f"Downloading model {model}")
|
||||||
|
|
||||||
|
files = ["checklist.chk", "params.json"]
|
||||||
|
|
||||||
|
for i in range(num[model]):
|
||||||
|
files.append(f"consolidated.0{i}.pth")
|
||||||
|
|
||||||
|
resolved_path = os.path.abspath(os.path.join(modelsDir, model))
|
||||||
|
os.makedirs(resolved_path, exist_ok=True)
|
||||||
|
|
||||||
|
for file in files:
|
||||||
|
dest_path = os.path.join(resolved_path, file)
|
||||||
|
|
||||||
|
if os.path.exists(dest_path):
|
||||||
|
print(f"Skip file download, it already exists: {file}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
url = f"https://agi.gpt4.org/llama/LLaMA/{model}/{file}"
|
||||||
|
response = requests.get(url, stream=True)
|
||||||
|
with open(dest_path, 'wb') as f:
|
||||||
|
with tqdm(unit='B', unit_scale=True, miniters=1, desc=file) as t:
|
||||||
|
for chunk in response.iter_content(chunk_size=1024):
|
||||||
|
if chunk:
|
||||||
|
f.write(chunk)
|
||||||
|
t.update(len(chunk))
|
||||||
|
|
||||||
|
files2 = ["tokenizer_checklist.chk", "tokenizer.model"]
|
||||||
|
for file in files2:
|
||||||
|
dest_path = os.path.join(modelsDir, file)
|
||||||
|
|
||||||
|
if os.path.exists(dest_path):
|
||||||
|
print(f"Skip file download, it already exists: {file}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
url = f"https://agi.gpt4.org/llama/LLaMA/{file}"
|
||||||
|
response = requests.get(url, stream=True)
|
||||||
|
with open(dest_path, 'wb') as f:
|
||||||
|
with tqdm(unit='B', unit_scale=True, miniters=1, desc=file) as t:
|
||||||
|
for chunk in response.iter_content(chunk_size=1024):
|
||||||
|
if chunk:
|
||||||
|
f.write(chunk)
|
||||||
|
t.update(len(chunk))
|
||||||
@ -1,9 +1,4 @@
|
|||||||
function(llama_add_test source)
|
set(TEST_TARGET test-tokenizer-0)
|
||||||
get_filename_component(TEST_TARGET ${source} NAME_WE)
|
add_executable(${TEST_TARGET} ${TEST_TARGET}.cpp)
|
||||||
add_executable(${TEST_TARGET} ${source})
|
|
||||||
target_link_libraries(${TEST_TARGET} PRIVATE llama ggml utils)
|
target_link_libraries(${TEST_TARGET} PRIVATE llama ggml utils)
|
||||||
add_test(NAME ${TEST_TARGET} COMMAND $<TARGET_FILE:${TEST_TARGET}> ${ARGN})
|
add_test(NAME ${TEST_TARGET} COMMAND $<TARGET_FILE:${TEST_TARGET}> ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab.bin)
|
||||||
endfunction()
|
|
||||||
|
|
||||||
llama_add_test(test-quantize.c)
|
|
||||||
llama_add_test(test-tokenizer-0.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab.bin)
|
|
||||||
|
|||||||
@ -1,42 +0,0 @@
|
|||||||
#include "ggml.h"
|
|
||||||
#undef NDEBUG
|
|
||||||
#include <assert.h>
|
|
||||||
#include <math.h>
|
|
||||||
|
|
||||||
int main(void) {
|
|
||||||
#define QK 32
|
|
||||||
float src[QK];
|
|
||||||
uint8_t dst[24];
|
|
||||||
int64_t hist[16];
|
|
||||||
|
|
||||||
for (int i = 0; i < QK; i++) {
|
|
||||||
src[i] = (float)(i + 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t size = ggml_quantize_q4_0(src, dst, QK, QK, QK, hist);
|
|
||||||
assert(size == 20);
|
|
||||||
float max_result = ((float *)dst)[0];
|
|
||||||
float max_expected = src[31] / ((1 << 3) - 1);
|
|
||||||
assert(max_result == max_expected);
|
|
||||||
for (int i = 0; i < QK; i++) {
|
|
||||||
uint8_t q4_result = (i % 2) ? (dst[sizeof(float) + i/2] >> 4) : (dst[sizeof(float) + i/2] & 0xF);
|
|
||||||
uint8_t q4_expected = roundf(src[i] / max_expected) + 8;
|
|
||||||
assert(q4_result == q4_expected);
|
|
||||||
}
|
|
||||||
|
|
||||||
size = ggml_quantize_q4_1(src, dst, QK, QK, QK, hist);
|
|
||||||
assert(size == 24);
|
|
||||||
float delta_result = ((float *)dst)[0];
|
|
||||||
float delta_expected = (src[31] - src[0]) / ((1 << 4) - 1);
|
|
||||||
assert(delta_result == delta_expected);
|
|
||||||
float min_result = ((float *)dst)[1];
|
|
||||||
float min_expected = src[0];
|
|
||||||
assert(min_result == min_expected);
|
|
||||||
for (int i = 0; i < QK; i++) {
|
|
||||||
uint8_t q4_result = (i % 2) ? (dst[sizeof(float)*2 + i/2] >> 4) : (dst[sizeof(float)*2 + i/2] & 0xF);
|
|
||||||
uint8_t q4_expected = roundf((src[i] - min_expected) / delta_expected);
|
|
||||||
assert(q4_result == q4_expected);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
Loading…
Reference in New Issue