You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
rllama/src/benches/benchmark.rs

101 lines
3.4 KiB
Rust

extern crate rllama;
use rllama::tensor::{Tensor, TensorDType};
use criterion::{black_box, criterion_group, criterion_main, Criterion};
pub fn tensor_benchmarks(c: &mut Criterion) {
let orig16_1 = Tensor::full(16, 32, TensorDType::Float16, 3.0);
let orig16_2 = Tensor::full(32, 512, TensorDType::Float16, -1.33);
let orig32_1 = Tensor::full(16, 32, TensorDType::Float32, 3.0);
let orig32_2 = Tensor::full(32, 512, TensorDType::Float32, -1.33);
let orig32_2_transposed = orig32_2.transpose();
let mut result_16 = Tensor::zeros(16, 512, TensorDType::Float16);
let mut result_32 = Tensor::zeros(16, 512, TensorDType::Float32);
let orig_84096_1 = Tensor::zeros(8, 4096, TensorDType::Float32);
let orig_84096_2 = Tensor::zeros(4096, 4096, TensorDType::Float32);
let mut result_84096 = Tensor::zeros(8, 4096, TensorDType::Float32);
let orig_f32 = Tensor::zeros(1024, 1024, TensorDType::Float32);
let orig_f16 = Tensor::zeros(1024, 1024, TensorDType::Float16);
c.bench_function("1024x1024 matrix from f32->f16", |b| {
b.iter(|| {
let _ = black_box(&orig_f32).to_f16();
})
});
c.bench_function("1024x1024 matrix from f16->f32", |b| {
b.iter(|| {
let _ = black_box(&orig_f16).to_f32();
})
});
c.bench_function(
"matrix multiplication 8x4096 @ 4096x4096 f32 in-place",
|b| {
b.iter(|| {
let _ = result_84096
.matrix_mul_inplace(black_box(&orig_84096_1), black_box(&orig_84096_2));
})
},
);
c.bench_function(
"matrix multiplication 8x4096 @ 4096x4096 f32 in-place, transposed",
|b| {
b.iter(|| {
let _ = result_84096.matrix_mul_inplace_transposed(
black_box(&orig_84096_1),
black_box(&orig_84096_2),
);
})
},
);
c.bench_function("matrix multiplication f32 not in-place", |b| {
b.iter(|| {
let _ = black_box(&orig32_1).matrix_mul(black_box(&orig32_2));
})
});
c.bench_function("matrix multiplication f32 naive", |b| {
b.iter(|| {
let _ = black_box(&orig32_1).matrix_mul_naive(black_box(&orig32_2));
})
});
c.bench_function("matrix multiplication f16 not in-place", |b| {
b.iter(|| {
let _ = black_box(&orig16_1).matrix_mul(black_box(&orig16_2));
})
});
c.bench_function("matrix multiplication f16 naive", |b| {
b.iter(|| {
let _ = black_box(&orig16_1).matrix_mul_naive(black_box(&orig16_2));
})
});
c.bench_function("matrix multiplication f16 in-place", |b| {
b.iter(|| {
let _ = result_16.matrix_mul_inplace(black_box(&orig16_1), black_box(&orig16_2));
})
});
c.bench_function("matrix multiplication f32 in-place", |b| {
b.iter(|| {
let _ = result_32.matrix_mul_inplace(black_box(&orig32_1), black_box(&orig32_2));
})
});
c.bench_function("matrix multiplication f32 in-place, transposed", |b| {
b.iter(|| {
let _ = result_32.matrix_mul_inplace_transposed(
black_box(&orig32_1),
black_box(&orig32_2_transposed),
);
})
});
}
criterion_group!(benches, tensor_benchmarks);
criterion_main!(benches);