CUDA Implementation

using CUDA;

make_4d(x) = reshape(x,(size(x,1),1,1,1));

tensor(x) = CuArray(make_4d(x));

gpu_filt(h, x, p=(0,0), s=(0,0), d=(0,0)) =
    CUDA.CUDNN.cudnnConvolutionForward(
        h, x, padding = p, stride = s, dilation = d
    );

function analysis_filterbank(x, LA, HA, n_octaves=10, n_suboctave_bands=32)
        
    for octave  1:n_octaves
        y = cat(
            x[1:size(x,1)-1],
            [gpu_filt(HA, x[end], (1,0), 1, 1)[1:2:end-1,:,:,:]],
            [gpu_filt(LA, x[end], (1,0), 1, 1)[1:2:end-1,:,:,:]],
            dims=1)
        x = y;
    end
    
    for octave  1:(n_octaves+1)
        while(size(x[octave],2) < n_suboctave_bands)
            y = hcat(
            gpu_filt(HA, x[octave], (1,0), 1, 1)[1:2:end-1,:,:,:],
            gpu_filt(LA, x[octave], (1,0), 1, 1)[1:2:end-1,:,:,:]
            );
            x[octave] = y
        end
    end
    
    return x;
    
end;

function synthesis_filterbank(x, LS, HS, n_octaves=10)
      
    for octave  1:(num_octaves+1)
        while(size(x[octave],2) > 1)
            ns = size(x[octave],1)*2;
            nf = size(x[octave],2)÷2;
            y = CUDA.zeros(ns, nf, 1, 1);
            y[1:2:end,:] = x[octave][:,1:nf];
            tmp = CUDA.zeros(ns, nf, 1, 1);
            tmp[1:2:end,:] = x[octave][:,nf+1:end];
            
            y = gpu_filt(HS, y, (1,0), 1, 1)[2:end,:,:,:] 
            tmp = gpu_filt(LS, tmp, (1,0), 1, 1)[2:end,:,:,:];
            x[octave] = y
        end
    end
    
    for octave  1:num_octaves
        y = cat(
            x[1:size(x,1)-2],
            [tensor(zeros(Float32,2*size(x[end],1)))],
            dims=1)
        tmp = tensor(zeros(Float32,2*size(x[end],1)));
        y[end][1:2:end] = x[end-1];
        tmp[1:2:end] = x[end];
        y[end] = gpu_filt(HS, y[end], (1,0), 1, 1)[2:end,:,:,:] 
              .+ gpu_filt(LS, tmp, (1,0), 1, 1)[2:end,:,:,:];
        x = y;
    end
    
    return x;
    
end;