@ -4,16 +4,18 @@
########################################################################################################
########################################################################################################
import numpy as np
import numpy as np
import math
import time
import time
import types
import types
import copy
import copy
import torch
import torch
from torch . nn import functional as F
from torch . nn import functional as F
from src . utils import TOKENIZER
from src . utils import TOKENIZER , Dataset
from src . model_run import RWKV_RNN
from src . model_run import RWKV_RNN
torch . backends . cudnn . benchmark = True
torch . backends . cudnn . benchmark = True
torch . backends . cudnn . allow_tf32 = True
torch . backends . cudnn . allow_tf32 = True
torch . backends . cuda . matmul . allow_tf32 = True
torch . backends . cuda . matmul . allow_tf32 = True
np . set_printoptions ( precision = 4 , suppress = True , linewidth = 200 )
### Step 1: set model ##################################################################################
### Step 1: set model ##################################################################################
@ -26,9 +28,11 @@ model_type = 'RWKV' # 'RWKV' or 'RWKV-ffnPre'
MODEL_NAME = ' trained-31 '
MODEL_NAME = ' trained-31 '
WORD_NAME = ' vocab ' # the .json vocab (generated by train.py
WORD_NAME = ' vocab ' # the .json vocab (generated by train.py
# ### uncompress enwik8-model.zip to test my enwik8 model
# ### ####### Uncomment these to test my 27M params enwik8 model ##########
# MODEL_NAME = 'enwik8-ppl1.65-6064-1024-RWKV-6-512-2022-03-25-21-05-13'
# MODEL_NAME = 'enwik8-ppl1.65-6064-1024-RWKV-6-512-2022-03-25-21-05-13'
# WORD_NAME = 'enwik8-vocab'
# WORD_NAME = 'enwik8-vocab'
# EVAL_DATA = 'enwik8' # uncomment this for EVAL MODE (no text generation)
# ########################################################################
# --> set UNKNOWN_CHAR to the rarest token in your vocab.json <--
# --> set UNKNOWN_CHAR to the rarest token in your vocab.json <--
# --> all unknown tokens in your context will be denoted by it <--
# --> all unknown tokens in your context will be denoted by it <--
@ -50,16 +54,44 @@ top_p_newline = 0.9
########################################################################################################
########################################################################################################
np . set_printoptions ( precision = 4 , suppress = True , linewidth = 200 )
print ( f ' Loading { MODEL_NAME } ... ' )
model = RWKV_RNN ( MODEL_NAME , RUN_DEVICE , model_type , n_layer , n_embd , ctx_len )
tokenizer = TOKENIZER ( WORD_NAME , UNKNOWN_CHAR = UNKNOWN_CHAR )
tokenizer = TOKENIZER ( WORD_NAME , UNKNOWN_CHAR = UNKNOWN_CHAR )
########################################################################################################
if ' EVAL_DATA ' in vars ( ) or ' EVAL_DATA ' in globals ( ) :
print ( ' Evaluating on ' + EVAL_DATA + ' ... ' )
data = open ( EVAL_DATA , " r " , encoding = ' utf-8 ' ) . read ( )
loss_table = np . zeros ( ctx_len )
N_SAMPLE = 1000
for iii in range ( N_SAMPLE ) :
pos = np . random . randint ( 0 , len ( data ) - ctx_len - 1 )
context = data [ pos : pos + ctx_len + 1 ]
ctx = [ tokenizer . stoi . get ( s , tokenizer . UNKNOWN_CHAR ) for s in context ]
model . clear ( )
for i in range ( 1 , ctx_len + 1 ) :
x = ctx [ : i ]
out = model . run ( x )
prob = F . softmax ( torch . tensor ( out ) , dim = - 1 )
loss_table [ i - 1 ] + = - math . log ( prob [ ctx [ i ] ] )
print ( f ' Tested { iii + 1 } samples: avg_loss over ctx_len = ' ,
np . mean ( loss_table ) / ( iii + 1 ) )
exit ( 0 )
########################################################################################################
context = tokenizer . refine_context ( context )
context = tokenizer . refine_context ( context )
print ( ' \n Your prompt has ' + str ( len ( context ) ) + ' tokens. ' )
print ( ' \n Your prompt has ' + str ( len ( context ) ) + ' tokens. ' )
print ( ' \n --> Currently the first run takes a while if your prompt is long, as we are using RNN to process the prompt. This will be much faster in future versions. <-- \n ' )
print ( ' \n --> Currently the first run takes a while if your prompt is long, as we are using RNN to process the prompt. This will be much faster in future versions. <-- \n ' )
print ( f ' Loading { MODEL_NAME } ... ' )
model = RWKV_RNN ( MODEL_NAME , RUN_DEVICE , model_type , n_layer , n_embd , ctx_len )
for TRIAL in range ( 1 if DEBUG_DEBUG else NUM_TRIALS ) :
for TRIAL in range ( 1 if DEBUG_DEBUG else NUM_TRIALS ) :
t_begin = time . time_ns ( )
t_begin = time . time_ns ( )