Add support for bigger models.

I've tested with 13B LLaMA model and it seems to work.

There was a bug in unpickler that skipped over tuples of size 1. I had
written bunch of code assuming there is no bug which I fixed and removed
some unpickling code.

I added functions to tensor.rs to be able construct tensors out of
multiple files.
broken-opencl-code
Mikko Juola 3 years ago
parent 8a427bcb21
commit 26d5309cf7

@ -8,11 +8,16 @@ As of writing of this, this can run LLaMA-7B at around ~1 token per second, on
a Ryzen 3950X using something like 1.5 threads because I haven't yet properly a Ryzen 3950X using something like 1.5 threads because I haven't yet properly
figured out how to multithread this. figured out how to multithread this.
It uses AVX2 intrinsics to speed up itself. Therefore, you need an x86-family I've also managed to run LLaMA-13B which just barely fits in my 64-gig machine
with 32-bit float weights everywhere.
I have not tried the bigger models yet.
This uses AVX2 intrinsics to speed up itself. Therefore, you need an x86-family
CPU to run this. CPU to run this.
It has a Python unpickler that understands the `.pth` files used by PyTorch. It also has a Python unpickler that understands the `.pth` files used by
Well sort of, it doesn't unzip them automatically (see below). PyTorch. Well almost, it doesn't unzip them automatically (see below).
# How to run # How to run
@ -27,16 +32,18 @@ decompress it.
$ cd LLaMA $ cd LLaMA
$ cd 7B $ cd 7B
$ unzip consolidated.00.pth $ unzip consolidated.00.pth
# Only necessary for LLaMA-7B, rllama currently expected .00, .01, .02 etc.in directories
$ mv consolidated consolidated.00
``` ```
You should then be ready to generate some text. You should then be ready to generate some text.
```shell ```shell
cargo run --release -- --tokenizer-model /path/to/tokenizer.model --model-path /path/to/LLaMA/7B/consolidated/data.pkl --prompt "The meaning of life is" cargo run --release -- --tokenizer-model /path/to/tokenizer.model --model-path /path/to/LLaMA/7B --param-path /path/to/LLaMA/7B/params.json --prompt "The meaning of life is"
``` ```
Right now it seems to use around ~25 gigabytes of memory. Internally all Right now it seems to use around ~25 gigabytes of memory for 7B and around ~50
weights are cast to 32-bit floats. gigabytes for 13B. Internally all weights are cast to 32-bit floats.
You can use `--temperature`, `--top-p` and `--top-k` to adjust token sampler You can use `--temperature`, `--top-p` and `--top-k` to adjust token sampler
settings. settings.

@ -1,4 +1,4 @@
use crate::tensor::Tensor; use crate::tensor::{FromPiecesDirection, Tensor, TensorBuilder};
use crate::unpickler; use crate::unpickler;
use crate::unpickler::*; use crate::unpickler::*;
use std::collections::BTreeMap; use std::collections::BTreeMap;
@ -10,12 +10,14 @@ pub struct Embedding {
impl Embedding { impl Embedding {
pub fn from_unpickled<P: AsRef<Path>>( pub fn from_unpickled<P: AsRef<Path>>(
unpickled: &unpickler::Value, unpickled: &[unpickler::Value],
data_dir: P, data_dir: P,
) -> Result<Self, UnpicklingError> { ) -> Result<Self, UnpicklingError> {
let data_dir: &Path = data_dir.as_ref(); let data_dir: &Path = data_dir.as_ref();
let val = match unpickled.get_str_key("tok_embeddings.weight") { let mut builders: Vec<TensorBuilder> = vec![];
for unpickle in unpickled.iter() {
let val = match unpickle.get_str_key("tok_embeddings.weight") {
Some(val) => val, Some(val) => val,
None => { None => {
return Err(UnpicklingError::MissingField( return Err(UnpicklingError::MissingField(
@ -23,11 +25,14 @@ impl Embedding {
)) ))
} }
}; };
let tensor = val builders.push(
.to_tensor_builder() val.to_tensor_builder()
.ok_or(UnpicklingError::InvalidTensorData)?; .ok_or(UnpicklingError::InvalidTensorData)?,
let tensor = tensor.load(data_dir)?; );
}
let tensor =
TensorBuilder::load_from_pieces(&builders, data_dir, FromPiecesDirection::Cols)?;
let num_embeddings = tensor.rows(); let num_embeddings = tensor.rows();
let mut table: BTreeMap<usize, Tensor> = BTreeMap::new(); let mut table: BTreeMap<usize, Tensor> = BTreeMap::new();

@ -3,10 +3,12 @@ use crate::token_sampler::TokenSampler;
use crate::tokenizer::{TokenId, Tokenizer}; use crate::tokenizer::{TokenId, Tokenizer};
use crate::transformer::Transformer; use crate::transformer::Transformer;
use crate::unpickler; use crate::unpickler;
use crate::unpickler::Value;
use clap::Parser; use clap::Parser;
use colored::Colorize; use colored::Colorize;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use std::io::{Read, Write}; use std::io::{Read, Write};
use std::path::PathBuf;
#[derive(Parser)] #[derive(Parser)]
#[command(author, version, about, long_about = None)] #[command(author, version, about, long_about = None)]
@ -94,37 +96,53 @@ pub fn main() -> Result<(), Box<dyn std::error::Error>> {
pln!("Starting up. Loading tokenizer from {}...", tokenizer_path); pln!("Starting up. Loading tokenizer from {}...", tokenizer_path);
let tok = Tokenizer::load(tokenizer_path.as_str())?; let tok = Tokenizer::load(tokenizer_path.as_str())?;
pln!("Tokenizer loaded. Loading model from {}...", model_path); pln!("Tokenizer loaded. Loading model from {}...", model_path);
let mut fs = std::fs::File::open(model_path.as_str())?;
let mut unpickle_results: Vec<Value> = vec![];
let mut part: usize = 0;
loop {
let model_path: PathBuf = model_path.clone().into();
let base_path = model_path.join(format!("consolidated.{:02}", part));
// The data file is in consolidated.XX/data.pkl where XX is the part number.
let full_path = base_path.join("data.pkl");
let mut fs = match std::fs::File::open(&full_path) {
Ok(fs) => fs,
Err(err) => {
if err.kind() == std::io::ErrorKind::NotFound {
break;
} else {
return Err(err.into());
}
}
};
let mut bs = Vec::new(); let mut bs = Vec::new();
fs.read_to_end(&mut bs)?; fs.read_to_end(&mut bs)?;
std::mem::drop(fs); std::mem::drop(fs);
pln!("Read data.pkl from path {}", full_path.display());
// We chop off file name from model_path and append "data/"
let model_data_dir = model_path
.split('/')
.take(model_path.split('/').count() - 1)
.collect::<Vec<&str>>()
.join("/")
+ "/data/";
let result = unpickler::unpickle(&bs)?; let result = unpickler::unpickle(&bs)?;
pln!("Loading embeddings from {}...", model_data_dir); unpickle_results.push(result);
let emb = Embedding::from_unpickled(&result, model_data_dir.clone())?; part += 1;
}
pln!("Loading embeddings from {}...", model_path);
let emb = Embedding::from_unpickled(&unpickle_results, model_path.clone())?;
let max_seq_len = match cli.max_seq_len { let max_seq_len = match cli.max_seq_len {
Some(max_seq_len) => max_seq_len, Some(max_seq_len) => max_seq_len,
None => 1024, None => 1024,
}; };
pln!("Loading transformer weights from {}...", model_data_dir); pln!("Loading transformer weights from {}...", model_path);
let tr = Transformer::from_unpickled( let tr = Transformer::from_unpickled(
&result, &unpickle_results,
emb, emb,
params.dim, params.dim,
params.n_layers, params.n_layers,
params.n_heads, params.n_heads,
max_seq_len, max_seq_len,
params.norm_eps, params.norm_eps,
model_data_dir, model_path,
)?; )?;
pln!("All is loaded. Starting inference."); pln!("All is loaded. Starting inference.");

@ -5,7 +5,7 @@ use rand::Rng;
use rayon::prelude::*; use rayon::prelude::*;
use std::alloc::Layout; use std::alloc::Layout;
use std::arch::x86_64::*; use std::arch::x86_64::*;
use std::io::Read; use std::io::{Read, Seek};
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
use thiserror::Error; use thiserror::Error;
@ -17,6 +17,7 @@ pub struct TensorBuilder {
pub(crate) rows: i64, pub(crate) rows: i64,
pub(crate) cols: i64, pub(crate) cols: i64,
pub(crate) nitems: i64, pub(crate) nitems: i64,
pub(crate) offset: i64,
} }
#[derive(Copy, Clone, Debug, Eq, Ord, PartialEq, PartialOrd)] #[derive(Copy, Clone, Debug, Eq, Ord, PartialEq, PartialOrd)]
@ -29,12 +30,20 @@ pub enum TensorDType {
pub enum TensorError { pub enum TensorError {
#[error("IO error: {0}")] #[error("IO error: {0}")]
IOError(#[from] std::io::Error), IOError(#[from] std::io::Error),
#[error("IOError while reading tensor: {0} {1}")]
TensorBuilderReadError(std::io::Error, String),
#[error("Invalid stride: {0}")] #[error("Invalid stride: {0}")]
InvalidStride(i64), InvalidStride(i64),
#[error("Tried to build a tensor from zero files")]
TensorBuilderEmpty,
#[error("Tried to build a tensor from multiple files but the number of rows do not agree between the files. {0} != {1}")]
TensorBuilderRowsMismatch(i64, i64),
#[error("Tried to build a tensor from multiple files but the data types do not agree between the files. {0:?} != {1:?}")]
TensorBuilderDTypeMismatch(TensorDType, TensorDType),
} }
impl TensorDType { impl TensorDType {
fn bytes_per_item(&self) -> usize { pub fn bytes_per_item(&self) -> usize {
match self { match self {
Self::Float16 => 2, Self::Float16 => 2,
Self::Float32 => 4, Self::Float32 => 4,
@ -118,6 +127,28 @@ impl Tensor {
Ok(val) Ok(val)
} }
pub fn from_unpickled_pieces<P: AsRef<Path>, S: AsRef<str>>(
unpickled: &[unpickler::Value],
name: S,
data_dir: P,
direction: FromPiecesDirection,
) -> Result<Tensor, UnpicklingError> {
let data_dir: &Path = data_dir.as_ref();
let name: &str = name.as_ref();
let mut builders = Vec::new();
for unpickle in unpickled.iter() {
let val = unpickle
.get_str_key(name)
.ok_or(UnpicklingError::MissingField(name.to_string()))?;
let val = val
.to_tensor_builder()
.ok_or(UnpicklingError::InvalidTensorData)?;
builders.push(val);
}
let val = TensorBuilder::load_from_pieces(&builders, data_dir, direction)?;
Ok(val)
}
pub fn rows(&self) -> i64 { pub fn rows(&self) -> i64 {
self.rows self.rows
} }
@ -412,10 +443,16 @@ impl Tensor {
pub fn hadamard_product_broadcast(&self, other: &Tensor) -> Tensor { pub fn hadamard_product_broadcast(&self, other: &Tensor) -> Tensor {
if self.cols != other.cols { if self.cols != other.cols {
panic!("Invalid hadamard product broadcast"); panic!(
"Invalid hadamard product broadcast: {}x{} vs {}x{}",
self.rows, self.cols, other.rows, other.cols
);
} }
if other.rows != 1 { if other.rows != 1 {
panic!("Invalid hadamard product broadcast"); panic!(
"Invalid hadamard product broadcast: {}x{} vs {}x{}",
self.rows, self.cols, other.rows, other.cols
);
} }
let mut result = unsafe { Tensor::uninitialized(self.rows, self.cols, self.dtype) }; let mut result = unsafe { Tensor::uninitialized(self.rows, self.cols, self.dtype) };
for row in 0..self.rows { for row in 0..self.rows {
@ -1036,7 +1073,10 @@ impl Tensor {
pub fn view(&self, rows: i64, cols: i64) -> Tensor { pub fn view(&self, rows: i64, cols: i64) -> Tensor {
if rows * cols != self.rows * self.cols { if rows * cols != self.rows * self.cols {
panic!("Invalid tensor view"); panic!(
"Invalid tensor view, requested {}x{} but tensor is {}x{}",
rows, cols, self.rows, self.cols
);
} }
if rows == self.rows { if rows == self.rows {
return self.clone(); return self.clone();
@ -1139,6 +1179,16 @@ impl Tensor {
} }
} }
/// When we load multiple tensors, should we slap them together row by row, or column by column?
///
/// E.g. If we have 32x4 and 32x4 then Rows --> 64x4
/// If we have 32x4 and 32x4 then Cols --> 32x8
#[derive(Clone, Copy, Eq, Ord, PartialEq, PartialOrd, Debug)]
pub enum FromPiecesDirection {
Rows,
Cols,
}
impl TensorBuilder { impl TensorBuilder {
pub fn load<P: AsRef<Path>>(&self, data_dir: P) -> Result<Tensor, TensorError> { pub fn load<P: AsRef<Path>>(&self, data_dir: P) -> Result<Tensor, TensorError> {
let data_dir: &Path = data_dir.as_ref(); let data_dir: &Path = data_dir.as_ref();
@ -1146,21 +1196,194 @@ impl TensorBuilder {
return Err(TensorError::InvalidStride(self.stride)); return Err(TensorError::InvalidStride(self.stride));
} }
let tensor = unsafe { Tensor::uninitialized(self.rows, self.cols, self.dtype) }; let tensor = unsafe { Tensor::uninitialized(self.rows, self.cols, self.dtype) };
assert_eq!(self.dtype, TensorDType::Float16); let path = data_dir
let path = data_dir.join(&self.src_path); .join(format!("consolidated.{:02}", 0))
.join("data")
.join(&self.src_path);
let mut f = std::fs::File::open(&path).unwrap(); let mut f = std::fs::File::open(&path).unwrap();
f.seek(std::io::SeekFrom::Start(
(self.offset as u64) * self.dtype.bytes_per_item() as u64,
))?;
let mut cursor: usize = 0; let mut cursor: usize = 0;
let mut buf: Vec<u8> = vec![0; self.cols as usize * 2]; let mut buf: Vec<u8> = vec![0; self.cols as usize * self.dtype.bytes_per_item()];
for _row in 0..self.rows { for _row in 0..self.rows {
f.read_exact(&mut buf)?; f.read_exact(&mut buf)?;
unsafe { unsafe {
std::ptr::copy_nonoverlapping(buf.as_ptr(), tensor.data.add(cursor), buf.len()); std::ptr::copy_nonoverlapping(buf.as_ptr(), tensor.data.add(cursor), buf.len());
} }
cursor += tensor.capacity_cols as usize * 2; cursor += tensor.capacity_cols as usize * self.dtype.bytes_per_item();
} }
Ok(tensor.to_f32()) Ok(tensor.to_f32())
} }
/// Loads a tensor from multiple TensorBuilders; used to load a tensor from multiple files
/// which is what the larger LLaMA models do.
pub fn load_from_pieces<P: AsRef<Path>>(
builders: &[Self],
data_dir: P,
direction: FromPiecesDirection,
) -> Result<Tensor, TensorError> {
let data_dir: &Path = data_dir.as_ref();
if builders.is_empty() {
return Err(TensorError::TensorBuilderEmpty);
}
fn load_from_pieces_cols(
builders: &[TensorBuilder],
data_dir: &Path,
) -> Result<Tensor, TensorError> {
let mut total_cols: i64 = 0;
let expected_rows: i64 = builders[0].rows;
let expected_dtype: TensorDType = builders[0].dtype;
// Do some checking before we attempt loading.
for builder in builders.iter() {
total_cols += builder.cols;
if builder.stride < 1 {
return Err(TensorError::InvalidStride(builder.stride));
}
if builder.rows != expected_rows {
return Err(TensorError::TensorBuilderRowsMismatch(
builder.rows,
expected_rows,
));
}
if builder.dtype != expected_dtype {
return Err(TensorError::TensorBuilderDTypeMismatch(
builder.dtype,
expected_dtype,
));
}
}
let tensor =
unsafe { Tensor::uninitialized(expected_rows, total_cols, builders[0].dtype) };
let mut buf: Vec<u8> = vec![];
let mut col_offset = 0;
for (idx, builder) in builders.iter().enumerate() {
let path = data_dir
.join(format!("consolidated.{:02}", idx))
.join("data")
.join(&builder.src_path);
buf.truncate(0);
buf.resize(builder.cols as usize * builder.dtype.bytes_per_item(), 0);
let mut f = std::fs::File::open(&path).unwrap();
f.seek(std::io::SeekFrom::Start(
(builder.offset as u64) * builder.dtype.bytes_per_item() as u64,
))?;
for row in 0..builder.rows {
match f.read_exact(&mut buf) {
Ok(_) => {}
Err(err) => {
return Err(TensorError::TensorBuilderReadError(
err,
format!(
"path={:?} row={} expected_len={} offset={}",
path,
row,
buf.len(),
builder.offset
),
));
}
};
unsafe {
std::ptr::copy_nonoverlapping(
buf.as_ptr(),
tensor.data.add(
((row * tensor.capacity_cols + col_offset) as usize)
* builder.dtype.bytes_per_item(),
),
buf.len(),
);
}
}
col_offset += builder.cols;
}
Ok(tensor.to_f32())
}
fn load_from_pieces_rows(
builders: &[TensorBuilder],
data_dir: &Path,
) -> Result<Tensor, TensorError> {
let mut total_rows: i64 = 0;
let expected_cols: i64 = builders[0].cols;
let expected_dtype: TensorDType = builders[0].dtype;
// Do some checking before we attempt loading.
for builder in builders.iter() {
total_rows += builder.rows;
if builder.stride < 1 {
return Err(TensorError::InvalidStride(builder.stride));
}
if builder.cols != expected_cols {
return Err(TensorError::TensorBuilderRowsMismatch(
builder.cols,
expected_cols,
));
}
if builder.dtype != expected_dtype {
return Err(TensorError::TensorBuilderDTypeMismatch(
builder.dtype,
expected_dtype,
));
}
}
let tensor =
unsafe { Tensor::uninitialized(total_rows, expected_cols, builders[0].dtype) };
let mut buf: Vec<u8> = vec![];
let mut row_offset: i64 = 0;
for (idx, builder) in builders.iter().enumerate() {
let path = data_dir
.join(format!("consolidated.{:02}", idx))
.join("data")
.join(&builder.src_path);
buf.truncate(0);
buf.resize(builder.cols as usize * builder.dtype.bytes_per_item(), 0);
let mut f = std::fs::File::open(&path).unwrap();
f.seek(std::io::SeekFrom::Start(
(builder.offset as u64) * builder.dtype.bytes_per_item() as u64,
))?;
for row in 0..builder.rows {
match f.read_exact(&mut buf) {
Ok(_) => {}
Err(err) => {
return Err(TensorError::TensorBuilderReadError(
err,
format!(
"path={:?} row={} expected_len={} offset={}",
path,
row,
buf.len(),
builder.offset
),
));
}
};
unsafe {
std::ptr::copy_nonoverlapping(
buf.as_ptr(),
tensor.data.add(
(((row + row_offset) * tensor.capacity_cols) as usize)
* builder.dtype.bytes_per_item(),
),
buf.len(),
);
}
}
row_offset += builder.rows;
}
Ok(tensor.to_f32())
}
match direction {
FromPiecesDirection::Rows => load_from_pieces_rows(builders, data_dir),
FromPiecesDirection::Cols => load_from_pieces_cols(builders, data_dir),
}
}
} }
#[cfg(test)] #[cfg(test)]

@ -1,5 +1,5 @@
use crate::embedding::Embedding; use crate::embedding::Embedding;
use crate::tensor::{Tensor, TensorDType}; use crate::tensor::{FromPiecesDirection, Tensor, TensorDType};
use crate::tokenizer::TokenId; use crate::tokenizer::TokenId;
use crate::unpickler; use crate::unpickler;
use crate::unpickler::UnpicklingError; use crate::unpickler::UnpicklingError;
@ -114,7 +114,7 @@ pub struct FeedForward {
impl Transformer { impl Transformer {
#[allow(clippy::too_many_arguments)] #[allow(clippy::too_many_arguments)]
pub fn from_unpickled<P: AsRef<Path>>( pub fn from_unpickled<P: AsRef<Path>>(
unpickled: &unpickler::Value, unpickled: &[unpickler::Value],
emb: Embedding, emb: Embedding,
dim: usize, dim: usize,
n_layers: usize, n_layers: usize,
@ -150,7 +150,13 @@ impl Transformer {
std::mem::drop(progress_bar); std::mem::drop(progress_bar);
let norm = RMSNorm::from_unpickled(unpickled, "norm.weight".to_string(), eps, data_dir)?; let norm = RMSNorm::from_unpickled(unpickled, "norm.weight".to_string(), eps, data_dir)?;
let output = Tensor::from_unpickled(unpickled, "output.weight", data_dir)?.to_f32(); let output = Tensor::from_unpickled_pieces(
unpickled,
"output.weight",
data_dir,
FromPiecesDirection::Rows,
)?
.to_f32();
Ok(Transformer { Ok(Transformer {
freqs_cis: compute_freqs_cis(dim / n_heads, max_seq_len, 10000.0), freqs_cis: compute_freqs_cis(dim / n_heads, max_seq_len, 10000.0),
@ -227,7 +233,7 @@ impl Transformer {
impl TransformerBlock { impl TransformerBlock {
pub fn from_unpickled<P: AsRef<Path>>( pub fn from_unpickled<P: AsRef<Path>>(
unpickled: &unpickler::Value, unpickled: &[unpickler::Value],
layer_id: usize, layer_id: usize,
eps: f64, eps: f64,
n_local_heads: usize, n_local_heads: usize,
@ -279,13 +285,19 @@ impl TransformerBlock {
impl RMSNorm { impl RMSNorm {
pub fn from_unpickled<P: AsRef<Path>>( pub fn from_unpickled<P: AsRef<Path>>(
unpickled: &unpickler::Value, unpickled: &[unpickler::Value],
name: String, name: String,
eps: f64, eps: f64,
data_dir: P, data_dir: P,
) -> Result<RMSNorm, UnpicklingError> { ) -> Result<RMSNorm, UnpicklingError> {
let data_dir: &Path = data_dir.as_ref(); let data_dir: &Path = data_dir.as_ref();
let weights = Tensor::from_unpickled(unpickled, name, data_dir)?.to_f32(); let weights = Tensor::from_unpickled_pieces(
&unpickled[0..=0],
name.clone(),
data_dir,
FromPiecesDirection::Rows,
)?
.to_f32();
Ok(Self { Ok(Self {
eps, eps,
weight: weights, weight: weights,
@ -301,28 +313,31 @@ impl RMSNorm {
impl FeedForward { impl FeedForward {
pub fn from_unpickled<P: AsRef<Path>>( pub fn from_unpickled<P: AsRef<Path>>(
unpickled: &unpickler::Value, unpickled: &[unpickler::Value],
layer_id: usize, layer_id: usize,
data_dir: P, data_dir: P,
) -> Result<FeedForward, UnpicklingError> { ) -> Result<FeedForward, UnpicklingError> {
let data_dir: &Path = data_dir.as_ref(); let data_dir: &Path = data_dir.as_ref();
let w1 = Tensor::from_unpickled( let w1 = Tensor::from_unpickled_pieces(
unpickled, unpickled,
format!("layers.{}.feed_forward.w1.weight", layer_id), format!("layers.{}.feed_forward.w1.weight", layer_id),
data_dir, data_dir,
FromPiecesDirection::Rows,
)? )?
.to_f32(); .to_f32();
let w2 = Tensor::from_unpickled( let w2 = Tensor::from_unpickled_pieces(
unpickled, unpickled,
format!("layers.{}.feed_forward.w2.weight", layer_id), format!("layers.{}.feed_forward.w2.weight", layer_id),
data_dir, data_dir,
FromPiecesDirection::Cols,
)? )?
.to_f32(); .to_f32();
let w3 = Tensor::from_unpickled( let w3 = Tensor::from_unpickled_pieces(
unpickled, unpickled,
format!("layers.{}.feed_forward.w3.weight", layer_id), format!("layers.{}.feed_forward.w3.weight", layer_id),
data_dir, data_dir,
FromPiecesDirection::Rows,
)? )?
.to_f32(); .to_f32();
@ -348,7 +363,7 @@ impl FeedForward {
impl Attention { impl Attention {
pub fn from_unpickled<P: AsRef<Path>>( pub fn from_unpickled<P: AsRef<Path>>(
unpickled: &unpickler::Value, unpickled: &[unpickler::Value],
layer_id: usize, layer_id: usize,
n_local_heads: usize, n_local_heads: usize,
head_dim: usize, head_dim: usize,
@ -356,28 +371,32 @@ impl Attention {
) -> Result<Attention, UnpicklingError> { ) -> Result<Attention, UnpicklingError> {
let data_dir: &Path = data_dir.as_ref(); let data_dir: &Path = data_dir.as_ref();
let wq = Tensor::from_unpickled( let wq = Tensor::from_unpickled_pieces(
unpickled, unpickled,
format!("layers.{}.attention.wq.weight", layer_id), format!("layers.{}.attention.wq.weight", layer_id),
data_dir, data_dir,
FromPiecesDirection::Rows,
)? )?
.to_f32(); .to_f32();
let wk = Tensor::from_unpickled( let wk = Tensor::from_unpickled_pieces(
unpickled, unpickled,
format!("layers.{}.attention.wk.weight", layer_id), format!("layers.{}.attention.wk.weight", layer_id),
data_dir, data_dir,
FromPiecesDirection::Rows,
)? )?
.to_f32(); .to_f32();
let wv = Tensor::from_unpickled( let wv = Tensor::from_unpickled_pieces(
unpickled, unpickled,
format!("layers.{}.attention.wv.weight", layer_id), format!("layers.{}.attention.wv.weight", layer_id),
data_dir, data_dir,
FromPiecesDirection::Rows,
)? )?
.to_f32(); .to_f32();
let wo = Tensor::from_unpickled( let wo = Tensor::from_unpickled_pieces(
unpickled, unpickled,
format!("layers.{}.attention.wo.weight", layer_id), format!("layers.{}.attention.wo.weight", layer_id),
data_dir, data_dir,
FromPiecesDirection::Cols,
)? )?
.to_f32(); .to_f32();
@ -494,7 +513,7 @@ impl Attention {
concat_vec.push(output.row(idx)); concat_vec.push(output.row(idx));
} }
let concat_vec2: Vec<&Tensor> = concat_vec.iter().collect(); let concat_vec2: Vec<&Tensor> = concat_vec.iter().collect();
let xq_row = Tensor::concat(&concat_vec2).view(1, 4096); let xq_row = Tensor::concat(&concat_vec2).view(1, self.wo.rows());
xq_row.matrix_mul_transposed(&self.wo) xq_row.matrix_mul_transposed(&self.wo)
}) })
.collect(); .collect();

@ -108,14 +108,12 @@ impl Value {
fn to_tensor_builder2(&self, args: &[Value]) -> Option<TensorBuilder> { fn to_tensor_builder2(&self, args: &[Value]) -> Option<TensorBuilder> {
if args.len() == 6 { if args.len() == 6 {
Self::to_tensor_builder2_6items(args) Self::to_tensor_builder2_6items(args)
} else if args.len() == 4 {
Self::to_tensor_builder2_4items(args)
} else { } else {
None None
} }
} }
fn to_tensor_builder2_4items(args: &[Value]) -> Option<TensorBuilder> { fn to_tensor_builder2_6items(args: &[Value]) -> Option<TensorBuilder> {
let storagev: &Value = args[0].get_persistent_id()?; let storagev: &Value = args[0].get_persistent_id()?;
let storage_args: &[Value] = storagev.get_tuple()?; let storage_args: &[Value] = storagev.get_tuple()?;
let storage_mark: &str = storage_args[0].get_str()?; let storage_mark: &str = storage_args[0].get_str()?;
@ -129,77 +127,52 @@ impl Value {
} }
let dtype: TensorDType = match storage_type { let dtype: TensorDType = match storage_type {
"HalfStorage" => TensorDType::Float16, "HalfStorage" => TensorDType::Float16,
_ => return None, _ => {
}; println!("1");
let storage_filename: &str = storage_args[2].get_str()?;
let nitems: i64 = storage_args[4].get_int64()?;
let offset: i64 = args[1].get_int64()?;
if offset != 0 {
return None;
}
let rows: i64 = 1;
let cols: i64 = nitems;
let row_stride: i64 = cols;
if row_stride != cols {
return None;
}
Some(TensorBuilder {
src_path: PathBuf::from(storage_filename),
dtype,
stride: row_stride,
rows,
cols,
nitems,
})
}
fn to_tensor_builder2_6items(args: &[Value]) -> Option<TensorBuilder> {
let storagev: &Value = args[0].get_persistent_id()?;
let storage_args: &[Value] = storagev.get_tuple()?;
let storage_mark: &str = storage_args[0].get_str()?;
if storage_mark != "storage" {
return None;
}
let (storage_module, storage_type) = storage_args[1].get_global()?;
if storage_module != "torch" {
return None; return None;
} }
let dtype: TensorDType = match storage_type {
"HalfStorage" => TensorDType::Float16,
_ => return None,
}; };
let storage_filename: &str = storage_args[2].get_str()?; let storage_filename: &str = storage_args[2].get_str()?;
let nitems: i64 = storage_args[4].get_int64()?; let nitems: i64 = storage_args[4].get_int64()?;
let offset: i64 = args[1].get_int64()?; let offset: i64 = args[1].get_int64()?;
if offset != 0 {
return None;
}
let shape: &[Value] = args[2].get_tuple()?; let shape: &[Value] = args[2].get_tuple()?;
let stride: &[Value] = args[3].get_tuple()?; let stride: &[Value] = args[3].get_tuple()?;
if shape.len() != 2 { if shape.len() != 2 && shape.len() != 1 {
println!("2");
return None; return None;
} }
if stride.len() != 2 { if stride.len() != 2 && stride.len() != 1 {
println!("3");
return None; return None;
} }
let rows: i64 = shape[0].get_int64()?; let (rows, cols) = if shape.len() == 2 {
let cols: i64 = shape[1].get_int64()?; (shape[0].get_int64()?, shape[1].get_int64()?)
} else {
let cols = shape[0].get_int64()?;
(1, cols)
};
let row_stride: i64 = stride[0].get_int64()?; let (row_stride, col_stride) = if stride.len() == 1 {
let col_stride: i64 = stride[1].get_int64()?; let (r, c) = (stride[0].get_int64()?, 1);
if r != 1 {
println!("4");
return None;
}
(r, c)
} else {
(stride[0].get_int64()?, stride[1].get_int64()?)
};
if col_stride != 1 { if col_stride != 1 {
println!("5");
return None; return None;
} }
if row_stride != cols { if row_stride != cols && stride.len() == 2 {
println!("6");
return None; return None;
} }
@ -210,6 +183,7 @@ impl Value {
rows, rows,
cols, cols,
nitems, nitems,
offset,
}) })
/* Args should look like this (took random example from debug print) : /* Args should look like this (took random example from debug print) :
@ -529,6 +503,7 @@ pub fn unpickle(bytes: &[u8]) -> Result<Value, UnpicklingError> {
)); ));
} }
tuple.push(stack.pop().unwrap()); tuple.push(stack.pop().unwrap());
stack.push(Value::Tuple(tuple));
bytes = &bytes[1..]; bytes = &bytes[1..];
continue; continue;
} }
@ -604,6 +579,27 @@ pub fn unpickle(bytes: &[u8]) -> Result<Value, UnpicklingError> {
bytes = &bytes[1..]; bytes = &bytes[1..];
continue; continue;
} }
if frame_opcode == 106 {
// long_binget
if bytes.len() < 5 {
return Err(UnpicklingError::UnpicklingError(
"Unexpected end of data while handling LONG_BINGET".to_string(),
));
}
let idx = u32::from_le_bytes([bytes[1], bytes[2], bytes[3], bytes[4]]);
match memo.get(&(idx as u32)) {
None => {
return Err(UnpicklingError::UnpicklingError(
"LONG_BINGET index out of range".to_string(),
));
}
Some(memo_value) => {
stack.push(memo_value.clone());
}
}
bytes = &bytes[5..];
continue;
}
if frame_opcode == 46 { if frame_opcode == 46 {
// stop // stop
// bytes = &bytes[1..]; // bytes = &bytes[1..];

Loading…
Cancel
Save