% EXAMPLE_CONV Example of use of software routines for the separation of a % convolutive mixture with 5cm microphone spacing % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Copyright 2008 Emmanuel Vincent % This software is distributed under the terms of the GNU Public License % version 3 (http://www.gnu.org/licenses/gpl.txt) %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Input time-frequency representation x=wavread('dev1/dev1_nodrums_synthconv_250ms_5cm_mix.wav').'; X=stft_multi(x); % Manual setting of the number of sources nsrc=3; % Estimation of the frequency-dependent mixing matrix wlen=2*(size(X,1)-1); Ae=estmix_conv_5cm([X;conj(X(wlen/2:-1:2,:,:))],nsrc); Ae=Ae(:,:,1:wlen/2+1); % Evaluation of the estimated frequency-dependent mixing matrix load('dev1/dev1_nodrums_synthconv_250ms_5cm_filt.mat'); A=sum(reshape(cat(3,A,zeros(2,nsrc,wlen*ceil(size(A,3)/wlen)-size(A,3))),2,nsrc,wlen,ceil(size(A,3)/wlen)),4); A=fft(A,[],3); A=A(:,:,1:wlen/2+1); [MER,perm]=bss_eval_mix(Ae,A); mkdir('results'); save('results/dev1_nodrums_synthconv_250ms_5cm_filt.mat','Ae'); % Source separation via binary masking S=sep_binmask(X,Ae); se=istft_multi(S,160000); for j=1:nsrc, wavwrite(se(j,:).',16000,['results/dev1_nodrums_synthconv_250ms_5cm_src_' int2str(j) '.wav']); end % Evaluation of the estimated sources s=zeros(3,160000); for j=1:3, s(j,:)=wavread(['dev1/dev1_nodrums_src_' int2str(j) '.wav'],[16001 176000]).'; end [SDRs,SIRs,SARs,perms]=bss_eval_sources(se,s); % Computation of the spatial source images I=src_image(S,Ae); ie=istft_multi(I,160000); for j=1:nsrc, wavwrite(reshape(ie(j,:,:),160000,2),16000,['results/dev1_nodrums_synthconv_250ms_5cm_sim_' int2str(j) '.wav']); end % Evaluation of the estimated source images i=zeros(3,160000,2); for j=1:3, i(j,:,:)=reshape(wavread(['dev1/dev1_nodrums_synthconv_250ms_5cm_sim_' int2str(j) '.wav']),1,160000,2); end [SDRi,ISRi,SIRi,SARi,permi]=bss_eval_images(ie,i); % Ideal binary masking benchmark (STFT) b1=sep_ibm(x,i); [SDRb1,ISRb1,SIRb1,SARb1]=bss_eval_images_nosort(b1,i); % Ideal binary masking benchmark (Cochleagram) b2=zeros(4,160000,2); for j=1:4, ibm=generateIBM(reshape(i(j,:,:),160000,2),reshape(sum(i([1:j-1 j+1:4],:,:),1),160000,2)); b2(j,:,:)=generateEstimate(x.',ibm); end [SDRb2,ISRb2,SIRb2,SARb2]=bss_eval_images_nosort(b2,i);