coreylowman · emchristiansen · Jul 25, 2023 · Jul 25, 2023 · Jul 25, 2023 · Jul 25, 2023
diff --git a/.rustfmt.toml b/.rustfmt.toml
diff --git a/src/tensor/gradients.rs b/src/tensor/gradients.rs
@@ -171,6 +171,30 @@ impl<E, D: Storage<E>> Gradients<E, D> {
         let r_ref = unsafe { &*r_ptr };
         (l_refs, r_ref)
     }
+
+    #[inline]
+    pub(crate) fn many_mut_and_ref<L: Shape, R: Shape>(
+        &mut self,
+        ls: &Vec<impl Tensorlike<L, E, D>>,
+        r: &impl Tensorlike<R, E, D>,
+    ) -> (Vec<&D::Vec>, &mut D::Vec) {
+        for i in 0..ls.len() {
+            assert_ne!(ls[i].id(), r.id());
+            for j in (i + 1)..ls.len() {
+                assert_ne!(ls[i].id(), ls[j].id());
+            }
+        }
+        let l_refs: Vec<&D::Vec> = ls
+            .iter()
+            .map(|l| {
+                let l_ptr = self.get_ref(l) as *const D::Vec;
+                unsafe { &*l_ptr }
+            })
+            .collect();
+        let r_ptr = self.get_mut(r) as *mut _;
+        let r_ref = unsafe { &mut *r_ptr };
+        (l_refs, r_ref)
+    }
 }
 
 /// Contains a [Gradients] and list of backward operations.

diff --git a/src/tensor_ops/mod.rs b/src/tensor_ops/mod.rs
@@ -207,6 +207,7 @@ mod sum_to;
 mod tanh;
 mod to_dtype;
 mod tri;
+mod unstack;
 mod upscale2d;
 mod var_to;
 
@@ -271,6 +272,7 @@ pub use sum_to::SumTo;
 pub use tanh::tanh;
 pub use to_dtype::to_dtype;
 pub use tri::{lower_tri, upper_tri};
+pub use unstack::TryUnstack;
 pub use upscale2d::{Bilinear, GenericUpscale2D, NearestNeighbor, TryUpscale2D, UpscaleMethod};
 pub use var_to::VarTo;
 

diff --git a/src/tensor_ops/unstack/cpu_kernel.rs b/src/tensor_ops/unstack/cpu_kernel.rs
@@ -0,0 +1,56 @@
+use crate::{
+    shapes::*,
+    tensor::{unique_id, Cpu, Tensor},
+};
+
+use std::vec::Vec;
+impl<E: Dtype> super::UnstackKernel<E> for Cpu {
+    fn forward<S: Shape>(
+        &self,
+        inp: &Tensor<S, E, Self>,
+    ) -> Result<Vec<Tensor<S::Smaller, E, Self>>, Self::Err>
+    where
+        S: super::SubDim,
+    {
+        let shape: S::Smaller = inp.shape().sub_dim();
+        let mut strides = shape.strides();
+        for i in 0..S::Smaller::NUM_DIMS {
+            strides[i] = inp.strides[i + 1];
+        }
+
+        let num_items = inp.shape().concrete()[0];
+        let item_size = inp.data.len() / num_items;
+
+        let mut tensors = Vec::with_capacity(num_items);
+        for i in 0..num_items {
+            let mut data = self.try_alloc_elem(item_size, E::default())?;
+            data.copy_from_slice(&inp.data[i * item_size..(i + 1) * item_size]);
+
+            tensors.push(Tensor {
+                id: unique_id(),
+                data: std::sync::Arc::new(data),
+                shape,
+                strides,
+                device: self.clone(),
+                tape: Default::default(),
+            });
+        }
+        Ok(tensors)
+    }
+
+    fn backward(
+        &self,
+        grad_inp: &mut Self::Vec,
+        grad_out: Vec<&Self::Vec>,
+    ) -> Result<(), Self::Err> {
+        let item_size = grad_inp.len() / grad_out.len();
+
+        for (i, item) in grad_out.into_iter().enumerate() {
+            for (j, value) in item.iter().enumerate() {
+                grad_inp[i * item_size + j] += *value;
+            }
+        }
+
+        Ok(())
+    }
+}
diff --git a/src/tensor_ops/unstack/mod.rs b/src/tensor_ops/unstack/mod.rs
@@ -0,0 +1,233 @@
+use crate::{shapes::*, tensor::*};
+
+use std::vec::Vec;
+
+mod cpu_kernel;
+
+/// Split a tensor along a dimension into a Vec of tensors
+///
+/// **Pytorch equivalent** `torch.unbind`.
+///
+/// ```rust
+/// # use dfdx::prelude::*;
+/// # let dev: Cpu = Default::default();
+/// let tensor: Tensor<Rank3<2, 3, 4>, f32, _> = dev.zeros();
+/// let result: Vec<Tensor<Rank2<3, 4>, f32, _>> = tensor.unstack();
+/// ```
+pub trait TryUnstack: Sized {
+    type Unstacked;
+    type Err: std::fmt::Debug;
+
+    /// Unstack a tensor along a dimension.
+    fn unstack(self) -> Self::Unstacked {
+        self.try_unstack().unwrap()
+    }
+    /// Fallible version of [TryUnstack::unstack]
+    fn try_unstack(self) -> Result<Self::Unstacked, Self::Err>;
+}
+
+impl<S: Shape, E: Dtype, D: UnstackKernel<E>, T> TryUnstack for Tensor<S, E, D, T>
+where
+    S: SubDim,
+    T: Tape<E, D>,
+{
+    type Err = D::Err;
+    type Unstacked = Vec<Tensor<S::Smaller, E, D, T>>;
+
+    fn try_unstack(self) -> Result<Self::Unstacked, Self::Err> {
+        try_unstack(self)
+    }
+}
+
+pub trait SubDim: Shape {
+    type Smaller: Shape;
+    fn sub_dim(&self) -> Self::Smaller;
+}
+
+impl<D1: Dim> SubDim for (D1,) {
+    type Smaller = ();
+    fn sub_dim(&self) -> Self::Smaller {}
+}
+
+impl<D1: Dim, D2: Dim> SubDim for (D1, D2) {
+    type Smaller = (D2,);
+    fn sub_dim(&self) -> Self::Smaller {
+        (self.1,)
+    }
+}
+
+impl<D1: Dim, D2: Dim, D3: Dim> SubDim for (D1, D2, D3) {
+    type Smaller = (D2, D3);
+    fn sub_dim(&self) -> Self::Smaller {
+        (self.1, self.2)
+    }
+}
+
+impl<D1: Dim, D2: Dim, D3: Dim, D4: Dim> SubDim for (D1, D2, D3, D4) {
+    type Smaller = (D2, D3, D4);
+    fn sub_dim(&self) -> Self::Smaller {
+        (self.1, self.2, self.3)
+    }
+}
+
+impl<D1: Dim, D2: Dim, D3: Dim, D4: Dim, D5: Dim> SubDim for (D1, D2, D3, D4, D5) {
+    type Smaller = (D2, D3, D4, D5);
+    fn sub_dim(&self) -> Self::Smaller {
+        (self.1, self.2, self.3, self.4)
+    }
+}
+
+impl<D1: Dim, D2: Dim, D3: Dim, D4: Dim, D5: Dim, D6: Dim> SubDim for (D1, D2, D3, D4, D5, D6) {
+    type Smaller = (D2, D3, D4, D5, D6);
+    fn sub_dim(&self) -> Self::Smaller {
+        (self.1, self.2, self.3, self.4, self.5)
+    }
+}
+
+pub trait UnstackKernel<E: Dtype>: Storage<E> {
+    fn forward<S: Shape>(
+        &self,
+        inp: &Tensor<S, E, Self>,
+    ) -> Result<Vec<Tensor<S::Smaller, E, Self>>, Self::Err>
+    where
+        S: SubDim;
+    fn backward(
+        &self,
+        grad_inp: &mut Self::Vec,
+        grad_out: Vec<&Self::Vec>,
+    ) -> Result<(), Self::Err>;
+}
+
+fn try_unstack<S: Shape, E: Dtype, D: UnstackKernel<E>, T: Tape<E, D>>(
+    tensor: Tensor<S, E, D, T>,
+) -> Result<Vec<Tensor<S::Smaller, E, D, T>>, D::Err>
+where
+    S: SubDim,
+{
+    let (input, mut tape): (Tensor<S, E, D>, T) = tensor.split_tape();
+    let device = input.device.clone();
+    let tensors = device.forward(&input)?;
+
+    let out_ghosts: Vec<_> = tensors.iter().map(|t| t.ghost()).collect();
+    let inp_ghost = input.ghost();
+    tape.add_backward_op(move |grads| {
+        for t in out_ghosts.iter() {
+            grads.try_alloc_for(t)?;
+        }
+        grads.try_alloc_for(&inp_ghost)?;
+        let (grad_out, grad_inp) = grads.many_mut_and_ref(&out_ghosts, &inp_ghost);
+        device.backward(grad_inp, grad_out)
+    });
+
+    let mut tensors = tensors.into_iter();
+    let first = tensors.next().map(|t| t.put_tape(tape));
+    let others = tensors
+        .map(|t| t.put_tape(Default::default()))
+        .collect::<Vec<_>>();
+
+    let mut result = Vec::new();
+    if let Some(first) = first {
+        result.push(first);
+    }
+    result.extend(others);
+    Ok(result)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::{tensor_ops::*, tests::*};
+
+    #[test]
+    fn test_valid_unstacks() {
+        let dev: TestDevice = Default::default();
+
+        {
+            let stacked: Tensor<Rank1<3>, TestDtype, _> = dev.sample_normal();
+            let unstacked = stacked.clone().unstack();
+            assert_eq!(unstacked.len(), 3);
+            for (index, item) in unstacked.into_iter().enumerate() {
+                assert_eq!(item.shape(), &());
+                assert_eq!(item.data[0], stacked.data[index]);
+            }
+        }
+
+        {
+            let stacked: Tensor<Rank2<4, 3>, TestDtype, _> = dev.sample_normal();
+            let unstacked = stacked.clone().unstack();
+            assert_eq!(unstacked.len(), 4);
+            for (index, item) in unstacked.into_iter().enumerate() {
+                assert_eq!(item.shape(), &(Const::<3>,));
+                for (i, &value) in item.data.iter().enumerate() {
+                    assert_eq!(value, stacked.data[index * 3 + i]);
+                }
+            }
+        }
+
+        {
+            let stacked: Tensor<(usize, usize), TestDtype, _> = dev.sample_normal_like(&(4, 3));
+            let unstacked = stacked.clone().unstack();
+            assert_eq!(unstacked.len(), 4);
+            for (index, item) in unstacked.into_iter().enumerate() {
+                assert_eq!(item.shape(), &(3,));
+                for (i, &value) in item.data.iter().enumerate() {
+                    assert_eq!(value, stacked.data[index * 3 + i]);
+                }
+            }
+        }
+    }
+
+    #[test]
+    fn test_unstack_backwards() {
+        let dev: TestDevice = Default::default();
+
+        let stacked: Tensor<Rank2<4, 3>, TestDtype, _> = dev.sample_normal();
+
+        // Block to compute gradients using 'unstack'.
+        let grads_unstack: Vec<Tensor<Rank1<3>, _, _, _>> = {
+            let sum = stacked
+                .leaky_trace()
+                .exp()
+                .unstack()
+                .into_iter()
+                .map(|x| x.sum())
+                .fold(None, |acc, item| {
+                    if let Some(acc) = acc {
+                        Some(acc + item)
+                    } else {
+                        Some(item)
+                    }
+                })
+                .unwrap();
+
+            let grads = sum.backward();
+            let grad = grads.get(&stacked);
+
+            (0..4)
+                .map(|i| {
+                    let idx: Tensor<Rank0, usize, _> = dev.tensor(i);
+                    grad.clone().select(idx)
+                })
+                .collect()
+        };
+
+        // Block to compute ground-truth gradients.
+        let grads_truth: Vec<Tensor<Rank1<3>, _, _, _>> = {
+            let sum = stacked.leaky_trace().exp().sum();
+            let grads = sum.backward();
+
+            let grad = grads.get(&stacked);
+
+            (0..4)
+                .map(|i| {
+                    let idx: Tensor<Rank0, usize, _> = dev.tensor(i);
+                    grad.clone().select(idx)
+                })
+                .collect()
+        };
+
+        for (grad_unstack, grad_truth) in grads_unstack.into_iter().zip(grads_truth.into_iter()) {
+            assert_eq!(grad_unstack.array(), grad_truth.array());
+        }
+    }
+}