You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
66 lines
2.3 KiB
Python
66 lines
2.3 KiB
Python
########################################################################################################
|
|
# The RWKV Language Model - https://github.com/BlinkDL/RWKV-LM
|
|
########################################################################################################
|
|
|
|
# this is for verifying the results of different models and make sure they agree with each other
|
|
|
|
import numpy as np
|
|
np.set_printoptions(precision=4, suppress=True, linewidth=200)
|
|
|
|
import os
|
|
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
|
|
RUN_DEVICE = 'cuda'
|
|
|
|
import torch
|
|
from src.model_run import RWKV_RNN, RWKV_GPT
|
|
from src.model import GPT, GPTConfig
|
|
|
|
ctx_len = 1024
|
|
n_layer = 6
|
|
n_embd = 512
|
|
model_type = 'RWKV'
|
|
|
|
model_name = 'trained-1'
|
|
|
|
from src.utils import TOKENIZER
|
|
tokenizer = TOKENIZER('vocab', UNKNOWN_CHAR=' ')
|
|
|
|
########################################################################################################
|
|
|
|
model_train = GPT(GPTConfig(tokenizer.vocab_size, ctx_len, model_type=model_type, n_layer=n_layer, n_embd=n_embd)).cuda()
|
|
print('loading ' + model_name)
|
|
m2 = torch.load(model_name + '.pth', map_location=RUN_DEVICE)
|
|
model_train.load_state_dict(m2)
|
|
|
|
model_rnn = RWKV_RNN(model_name, RUN_DEVICE, model_type, n_layer, n_embd, ctx_len)
|
|
model_gpt = RWKV_GPT(model_name, RUN_DEVICE, model_type, tokenizer.vocab_size, n_layer, n_embd, ctx_len).cuda()
|
|
|
|
########################################################################################################
|
|
|
|
context = '\nIn a'
|
|
ctx = [tokenizer.stoi.get(s, tokenizer.UNKNOWN_CHAR) for s in context]
|
|
print(f'input len {len(ctx)} data {ctx}')
|
|
|
|
########################################################################################################
|
|
|
|
print('\nRWKV-GPT output')
|
|
out = model_gpt.forward(torch.tensor(ctx).unsqueeze(0).cuda())[0].detach().cpu().numpy()
|
|
print(out)
|
|
|
|
print('\nRWKV-RNN output')
|
|
model_rnn.clear()
|
|
src_len = len(ctx)
|
|
for i in range(src_len):
|
|
x = ctx[:i+1]
|
|
out = model_rnn.run(x)
|
|
if i < 3 or i >= src_len - 3:
|
|
print(torch.tensor(out).detach().cpu().numpy())
|
|
if i == 2:
|
|
print('...')
|
|
|
|
print('\nRWKV-train output')
|
|
ctx += [0] * (ctx_len - src_len) # pad to ctx_len
|
|
ctx = [ctx] * 4 # increase batch size (to make it work with B_GROUP_FORWARD & B_GROUP_BACKWARD)
|
|
out = model_train.forward(torch.tensor(ctx).cuda())[0][0][:src_len].detach().cpu().numpy()
|
|
print(out, '\n')
|