Loading...
Reshape changes tensor dimensions while preserving total elements. For contiguous tensors, reshape is a free view (just metadata change). Non-contiguous tensors require a copy first.
Avoid copy if tensor is already contiguous.
struct TensorView {
float* data;
int* shape;
int* strides;
int ndim;
};
bool is_contiguous(TensorView& t) {
int expected_stride = 1;
for (int i = t.ndim - 1; i >= 0; i--) {
if (t.strides[i] != expected_stride) return false;
expected_stride *= t.shape[i];
}
return true;
}
TensorView reshape(TensorView& t, int* new_shape, int new_ndim) {
if (!is_contiguous(t)) {
// Must copy to contiguous first
t = make_contiguous(t);
}
// Just update shape/strides metadata
return {t.data, new_shape, compute_strides(new_shape, new_ndim), new_ndim};
}Unnecessary copy for contiguous tensors.
void reshape_naive(float* in, float* out, int n) {
cudaMemcpy(out, in, n * sizeof(float), cudaMemcpyDeviceToDevice);
// Update metadata...
}Zero-copy for contiguous, minimal copy otherwise.
// For contiguous tensors, reshape is just metadata update
class Tensor {
float* data;
std::vector<int> shape;
std::vector<int> strides;
Tensor reshape(std::vector<int> new_shape) {
if (is_contiguous()) {
// No copy needed - just new view
return Tensor(this->data, new_shape, compute_contiguous_strides(new_shape));
} else {
// Need contiguous copy
Tensor contig = this->contiguous();
return contig.reshape(new_shape);
}
}
};| Metric | Naive | Optimized | Improvement |
|---|---|---|---|
| Latency (contiguous) | 0.1ms copy | 0μs view | Instant |
| Latency (non-contiguous) | 0.1ms | 0.1ms | Same (required) |
After transpose, slice, or permute that makes tensor non-contiguous. Check strides to verify.
Ready to optimize your CUDA code? Download RightNow AI and get real-time performance analysis for your kernels.