Add readme, make clippy happy.

3 years ago · f6217e0036
parent 3b8f904f13
commit f6217e0036
7 changed files with 82 additions and 55 deletions
--- a/README.md
+++ b/README.md
@ -1,21 +1,41 @@
 # AdeonLLaMA

 This is my attempt at making the LLaMA language model working on a pure Rust
-CPU implementation.
+CPU implementation. I was inspired by an amazing CPU implementation here:
+https://github.com/ggerganov/ggml that could run GPT-J 8B models.

-As of writing of this, it can run LLaMA-7B at around ~1 token per second, using
-something like 1.5 threads because I haven't yet properly figured out how to
-multithread this.
+As of writing of this, this can run LLaMA-7B at around ~1 token per second,
+using something like 1.5 threads because I haven't yet properly figured out how
+to multithread this.

-It uses AVX2 intrinsics to speed up itself.
+It uses AVX2 intrinsics to speed up itself. Therefore, you need an x86-family
+CPU to run this.
+
+It has a Python unpickler that understands the `.pth` files used by PyTorch.
+Well sort of, it doesn't unzip them automatically (see below).

 # How to run

 You will need the LLaMA-7B weights first. Refer to https://github.com/facebookresearch/llama/

-Once you have 7B weights, and the `tokenizer.model` it comes with, you can make
-it generate tokens:
+Once you have 7B weights, and the `tokenizer.model` it comes with, you need to
+decompress it.
+
+```shell
+$ cd LLaMA
+$ cd 7B
+$ unzip consolidated.00.pth
+```
+
+You should then be ready to generate some text.

 ```shell
-cargo run --release -- --tokenizer-model /path/to/tokenizer.model --model-path /path/to/LLaMA/7B
+cargo run --release -- --tokenizer-model /path/to/tokenizer.model --model-path /path/to/LLaMA/7B/consolidated/data.pkl --prompt "The meaning of life is"
 ```
+
+Right now it seems to use around ~25 gigabytes of memory. Internally all
+weights are cast to 32-bit floats.
+
+# Future plans
+
+This is a hobby thing for me so don't expect updates or help.
--- a/src/rllama_main.rs
+++ b/src/rllama_main.rs
@ -40,8 +40,8 @@ pub fn main() -> Result<(), Box<dyn std::error::Error>> {

    // We chop off file name from model_path and append "data/"
    let model_data_dir = model_path
-        .split("/")
-        .take(model_path.split("/").count() - 1)
+        .split('/')
+        .take(model_path.split('/').count() - 1)
        .collect::<Vec<&str>>()
        .join("/")
        + "/data/";
@ -95,7 +95,7 @@ pub fn main() -> Result<(), Box<dyn std::error::Error>> {
                continue;
            }
            let tok = tok.id_to_str(*tok_id);
-            tok_str = tok_str + tok.replace("▁", " ").as_str();
+            tok_str += tok.replace('▁', " ").as_str();
        }
        println!("{}", tok_str);
    }
--- a/src/tensor.rs
+++ b/src/tensor.rs
@ -73,7 +73,7 @@ impl Clone for Tensor {
 impl Drop for Tensor {
    fn drop(&mut self) {
        unsafe {
-            if self.data != std::ptr::null_mut() {
+            if !self.data.is_null() {
                std::alloc::dealloc(self.data, self.layout);
            }
        }
@ -95,7 +95,7 @@ fn horizontal_sum(mut ymm: __m256) -> f32 {
        ymm = _mm256_add_ps(ymm, ymm2);
        ymm = _mm256_hadd_ps(ymm, ymm);
        ymm = _mm256_hadd_ps(ymm, ymm);
-        return _mm256_cvtss_f32(ymm);
+        _mm256_cvtss_f32(ymm)
    }
 }

@ -190,6 +190,7 @@ impl Tensor {
        }
    }

+    #[allow(clippy::missing_safety_doc)]
    pub unsafe fn uninitialized(rows: i64, cols: i64, dtype: TensorDType) -> Self {
        if rows == 0 || cols == 0 {
            let mut tensor = Self::empty();
@ -203,7 +204,7 @@ impl Tensor {
        let layout =
            Layout::from_size_align((nitems as usize) * dtype.bytes_per_item(), 32).unwrap();
        let data = unsafe { std::alloc::alloc(layout) };
-        if data == std::ptr::null_mut() {
+        if data.is_null() {
            panic!("Failed to allocate tensor");
        }
        // Even though we are uninitialized, we should zero out the extra space between the
@ -443,7 +444,7 @@ impl Tensor {
    }

    pub fn concat(pieces: &[&Tensor]) -> Tensor {
-        if pieces.len() == 0 {
+        if pieces.is_empty() {
            return Tensor::empty();
        }
        let mut total_rows: i64 = 0;
@ -745,7 +746,7 @@ impl Tensor {

                unsafe {
                    for row in 0..self_rows {
-                        let row = row as usize;
+                        let row = row;
                        for col in 0..self_cols {
                            let mut target8: __m256 = _mm256_setzero_ps();
                            for p in 0..src_cols_its {
@ -819,7 +820,7 @@ impl Tensor {
            for row in 0..self.rows {
                let mut sum8: __m256 = _mm256_setzero_ps();
                for col in 0..col_its {
-                    let col = (col * 8) as usize;
+                    let col = col * 8;
                    let left_side8 =
                        _mm256_loadu_ps(self_data.add((row * self.capacity_cols) as usize + col));
                    let right_side8 = _mm256_loadu_ps(other_data.add(col));
@ -885,7 +886,7 @@ impl Tensor {
        let layout =
            Layout::from_size_align((nitems as usize) * dtype.bytes_per_item(), 32).unwrap();
        let data = unsafe { std::alloc::alloc_zeroed(layout) };
-        if data == std::ptr::null_mut() {
+        if data.is_null() {
            panic!("Failed to allocate tensor");
        }
        Self {
@ -1044,7 +1045,7 @@ impl TensorBuilder {
            unsafe {
                std::ptr::copy_nonoverlapping(buf.as_ptr(), tensor.data.add(cursor), buf.len());
            }
-            cursor = cursor + (tensor.capacity_cols as usize * 2);
+            cursor += tensor.capacity_cols as usize * 2;
        }
        Ok(tensor.to_f32())
    }
--- a/src/token_sampler.rs
+++ b/src/token_sampler.rs
@ -8,6 +8,12 @@ pub struct TokenSampler {
    top_k: usize,
 }

+impl Default for TokenSampler {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
 impl TokenSampler {
    pub fn new() -> Self {
        Self {
@ -58,7 +64,7 @@ impl TokenSampler {
            logitsf.push((i as TokenId, logits.get_f32(0, i)));
        }
        logitsf.sort_unstable_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
-        logitsf.truncate(self.top_k as usize);
+        logitsf.truncate(self.top_k);
        let mut p_accum: f32 = 0.0;
        for (idx, v) in logitsf.iter().enumerate() {
            p_accum += v.1;
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@ -142,7 +142,7 @@ impl Tokenizer {
    pub fn tokenize_to_ids<S: AsRef<str>>(&self, s: S) -> Vec<TokenId> {
        let mut s: String = format!("▁{}", s.as_ref());
        // Replace all space characters with a special token.
-        s = s.replace(" ", "▁");
+        s = s.replace(' ', "▁");

        let pieces = self.tokenize_to_pieces(s);
        let mut result = Vec::new();
--- a/src/transformer.rs
+++ b/src/transformer.rs
@ -85,6 +85,7 @@ pub struct FeedForward {
 }

 impl Transformer {
+    #[allow(clippy::too_many_arguments)]
    pub fn from_unpickled<P: AsRef<Path>>(
        unpickled: &unpickler::Value,
        emb: Embedding,
@ -117,9 +118,8 @@ impl Transformer {
            .collect::<Result<Vec<TransformerBlock>, UnpicklingError>>()?;
        std::mem::drop(progress_bar);

-        let norm = RMSNorm::from_unpickled(unpickled, format!("norm.weight"), eps, data_dir)?;
-        let output =
-            Tensor::from_unpickled(unpickled, format!("output.weight"), data_dir)?.to_f32();
+        let norm = RMSNorm::from_unpickled(unpickled, "norm.weight".to_string(), eps, data_dir)?;
+        let output = Tensor::from_unpickled(unpickled, "output.weight", data_dir)?.to_f32();

        Ok(Transformer {
            freqs_cis: compute_freqs_cis(dim / n_heads, max_seq_len * 2, 10000.0),
@ -189,8 +189,8 @@ impl Transformer {
        }
        let out = self.norm.forward(&emb_tensor);
        let out = out.row(out.rows() - 1);
-        let prediction = self.output.matrix_mul_transposed(&out);
-        return prediction;
+
+        self.output.matrix_mul_transposed(&out)
    }
 }

@ -242,8 +242,8 @@ impl TransformerBlock {
        let h = x.add(&att_out);
        let att_out = self.ffn_norm.forward(&h);
        let att_out = self.feed_forward.forward(&att_out.transpose()).transpose();
-        let att_out = h.add(&att_out);
-        return att_out;
+
+        h.add(&att_out)
    }
 }

@ -255,7 +255,7 @@ impl RMSNorm {
        data_dir: P,
    ) -> Result<RMSNorm, UnpicklingError> {
        let data_dir: &Path = data_dir.as_ref();
-        let weights = Tensor::from_unpickled(unpickled, &name, data_dir)?.to_f32();
+        let weights = Tensor::from_unpickled(unpickled, name, data_dir)?.to_f32();
        Ok(Self {
            eps,
            weight: weights,
@ -265,7 +265,7 @@ impl RMSNorm {
    fn forward(&self, x: &Tensor) -> Tensor {
        let inner = x.pow(2.0).mean_cols().add_scalar(self.eps as f32);
        let out1 = x.scalar_multiply_broadcast(&inner.rsqrt());
-        return out1.hadamard_product_broadcast(&self.weight);
+        out1.hadamard_product_broadcast(&self.weight)
    }
 }

@ -307,8 +307,8 @@ impl FeedForward {
        );
        let w1_out = w1_out.silu();
        let w1w3_out = w1_out.hadamard_product(&w3_out).transpose();
-        let out = self.w2.matrix_mul_transposed(&w1w3_out);
-        return out;
+
+        self.w2.matrix_mul_transposed(&w1w3_out)
    }
 }

@ -417,8 +417,8 @@ impl Attention {
                let concat_vec2: Vec<&Tensor> = concat_vec.iter().collect();
                let xv_row = Tensor::concat(&concat_vec2);

-                let mut cache_k = attention_cache.cache_k[idx as usize].write().unwrap();
-                let mut cache_v = attention_cache.cache_v[idx as usize].write().unwrap();
+                let mut cache_k = attention_cache.cache_k[idx].write().unwrap();
+                let mut cache_v = attention_cache.cache_v[idx].write().unwrap();

                /*
                let m = xq_row
@ -442,21 +442,21 @@ impl Attention {
                        cache_v.set_f32(dim as i64, pos as i64, v);
                    }
                }
-                let keys = cache_k.clip_cols((start_pos + seq_len as usize) as usize);
-                let values = cache_v.clip_cols((start_pos + seq_len as usize) as usize);
+                let keys = cache_k.clip_cols(start_pos + seq_len as usize);
+                let values = cache_v.clip_cols(start_pos + seq_len as usize);

                let m = xq_row
                    .matrix_mul(&keys)
                    .scalar_multiply_f32(1.0 / (self.head_dim as f32).sqrt());
-                let m2 = match mask {
+
+                match mask {
                    Some(ref mask) => m
                        .add(mask)
                        .to_f32()
                        .softmax()
                        .matrix_mul_transposed(&values),
                    None => m.softmax().matrix_mul_transposed(&values),
-                };
-                m2
+                }
            })
            .collect();

@ -466,18 +466,18 @@ impl Attention {
            .into_par_iter()
            .map(|idx| {
                let mut concat_vec: Vec<Tensor> = vec![];
-                for idx2 in 0..self.n_local_heads {
-                    concat_vec.push(output[idx2 as usize].row(idx as i64));
+                for output in &output {
+                    concat_vec.push(output.row(idx));
                }
                let concat_vec2: Vec<&Tensor> = concat_vec.iter().collect();
                let xq_row = Tensor::concat(&concat_vec2).view(1, 4096);
-                let xq_row = xq_row.matrix_mul_transposed(&self.wo);
-                xq_row
+
+                xq_row.matrix_mul_transposed(&self.wo)
            })
            .collect();
        let output3: Vec<&Tensor> = output2.iter().collect();
        let output2: Tensor = Tensor::concat(&output3);
-        return output2;
+        output2
    }
 }

@ -513,7 +513,7 @@ fn apply_rotary_emb(
            xk_out.set_f32(row, col * 2 + 1, xk_imagpart);
        }
    }
-    return (xq_out, xk_out);
+    (xq_out, xk_out)
 }

 fn compute_freqs_cis(dim: usize, end: usize, theta: f64) -> FreqsCis {
@ -526,8 +526,8 @@ fn compute_freqs_cis(dim: usize, end: usize, theta: f64) -> FreqsCis {
    let mut result: Vec<Vec<f64>> = Vec::new();
    for x in 0..end {
        let mut row = Vec::new();
-        for y in 0..freqs.len() {
-            let freq = freqs[y] * (x as f64);
+        for freq in freqs.iter() {
+            let freq = freq * (x as f64);
            row.push(freq);
        }
        result.push(row);
--- a/src/unpickler.rs
+++ b/src/unpickler.rs
@ -72,14 +72,14 @@ impl Value {

    pub fn get_persistent_id(&self) -> Option<&Value> {
        match self {
-            Value::PersistentId(v) => Some(&v),
+            Value::PersistentId(v) => Some(v),
            _ => None,
        }
    }

    pub fn get_tuple(&self) -> Option<&[Value]> {
        match self {
-            Value::Tuple(v) => Some(&v),
+            Value::Tuple(v) => Some(v),
            _ => None,
        }
    }
@ -92,7 +92,7 @@ impl Value {
                Value::Global(ref module_name, ref attribute_name) => {
                    if module_name == "torch._utils" && attribute_name == "_rebuild_tensor_v2" {
                        match **args {
-                            Value::Tuple(ref args) => self.to_tensor_builder2(&args),
+                            Value::Tuple(ref args) => self.to_tensor_builder2(args),
                            _ => None,
                        }
                    } else {
@ -146,14 +146,14 @@ impl Value {
            return None;
        }

-        return Some(TensorBuilder {
+        Some(TensorBuilder {
            src_path: PathBuf::from(storage_filename),
            dtype,
            stride: row_stride,
            rows,
            cols,
            nitems,
-        });
+        })
    }

    fn to_tensor_builder2_6items(args: &[Value]) -> Option<TensorBuilder> {
@ -203,14 +203,14 @@ impl Value {
            return None;
        }

-        return Some(TensorBuilder {
+        Some(TensorBuilder {
            src_path: PathBuf::from(storage_filename),
            dtype,
            stride: row_stride,
            rows,
            cols,
            nitems,
-        });
+        })

        /* Args should look like this (took random example from debug print) :
            0 PERSISTENT_ID
@ -545,7 +545,7 @@ pub fn unpickle(bytes: &[u8]) -> Result<Value, UnpicklingError> {
                    "Stack is empty while handling LONG_BINPUT".to_string(),
                ));
            }
-            memo.insert(key as u32, stack.last().unwrap().clone());
+            memo.insert(key, stack.last().unwrap().clone());
            bytes = &bytes[5..];
            continue;
        }