diff --git a/RWKV-v4/run.py b/RWKV-v4/run.py
index 22fcc50..1c1c2fb 100644
--- a/RWKV-v4/run.py
+++ b/RWKV-v4/run.py
@@ -57,6 +57,11 @@ elif TOKEN_MODE == 'pile':
     # n_embd = 1024
     # ctx_len = 1024
 
+    # MODEL_NAME = 'RWKV-4-Pile-1B5-20220903-8040'
+    # n_layer = 24
+    # n_embd = 2048
+    # ctx_len = 1024    
+
 os.environ['RWKV_FLOAT_MODE'] = 'fp32'  # 'bf16' / 'fp16' / 'fp32' (note: only using fp32 at this moment)
 os.environ['RWKV_RUN_DEVICE'] = 'cpu'   # 'cpu' (already very fast) or 'cuda'
 model_type = 'RWKV' # 'RWKV' or 'RWKV-ffnPre'
diff --git a/RWKV-v4/src/model.py b/RWKV-v4/src/model.py
index 7434ccb..e4faf89 100644
--- a/RWKV-v4/src/model.py
+++ b/RWKV-v4/src/model.py
@@ -278,7 +278,7 @@ class Block(nn.Module):
             self.ln0 = nn.LayerNorm(config.n_embd)
 
         if self.layer_id == 0 and self.config.model_type == 'RWKV-ffnPre':
-            self.ffnPre = RWKV_ChannelMix(config, layer_id+1000)
+            self.ffnPre = RWKV_ChannelMix(config, 0)
         else:
             self.att = RWKV_TimeMix(config, layer_id)