diff --git a/Level_1/AACoder1.m b/Level_1/AACoder1.m new file mode 100644 index 0000000..d3de782 --- /dev/null +++ b/Level_1/AACoder1.m @@ -0,0 +1,57 @@ +function AACSeq1 = AACoder1(fNameIn) +%Implementation of WHAT?? //TODO!! +% Usage AACSeq1 = AACoder1(fNameIn), where: +% Inputs +% - fNameIn is the filename and path of the file to encode +% +% Output +% - AACSeq1 is an array of structs containing K structs, where K is +% the number of computed frames. Every struct of the array consists +% of a frameType, a winType, chl.frameF which are the MDCT +% coefficients of this frame's left channel, chr.frameF which are +% the MDCT coefficients of this frame's right channel + + % Reads the audio file + [originalAudioData, ~] = audioread(fNameIn); + + % Splits the audio in frames and determines the type of each frame + frameTypes{fix((length(originalAudioData) - 1025) / 1024)} = 'OLS'; + frameTypes{1} = 'OLS'; + for i = 1:length(frameTypes) - 2 + nextFrameStart = (i + 1) * 1024 + 1; + nextFrameStop = nextFrameStart + 2047; + frameTypes{i+1} = SSC(1, originalAudioData(nextFrameStart:nextFrameStop, :), frameTypes{i}); + end + + % Assignes a type to the last frame + if strcmp(frameTypes{length(frameTypes) - 1}, 'LSS') + frameTypes{length(frameTypes)} = 'ESH'; + elseif strcmp(frameTypes{length(frameTypes) - 1}, 'ESH') + frameTypes{length(frameTypes)} = 'ESH'; + else + frameTypes{length(frameTypes)} = 'OLS'; + end + + AACSeq1(length(frameTypes)) = struct; + for i = 0:length(frameTypes)-1 + currFrameStart = i * 1024 + 1; + currFrameStop = currFrameStart + 2047; + frameF = filterbank(originalAudioData(currFrameStart:currFrameStop, :), frameTypes{i+1}, 'SIN'); + + AACSeq1(i + 1).frameType = frameTypes(i + 1); + AACSeq1(i + 1).winType = 'KBD'; + AACSeq1(i + 1).chl.frameF = frameF(:, 1); + AACSeq1(i + 1).chr.frameF = frameF(:, 2); + end + + if true + [idx,label] = grp2idx(sort(frameTypes)); + hist(idx,unique(idx)); + set(gca,'xTickLabel',label) + + sum(idx(:) == 1) + sum(idx(:) == 2) + sum(idx(:) == 3) + sum(idx(:) == 4) + end +end diff --git a/Level_1/LicorDeCalandraca.wav b/Level_1/LicorDeCalandraca.wav new file mode 100644 index 0000000..a527e8c Binary files /dev/null and b/Level_1/LicorDeCalandraca.wav differ diff --git a/Level_1/SSC.m b/Level_1/SSC.m new file mode 100644 index 0000000..db5d485 --- /dev/null +++ b/Level_1/SSC.m @@ -0,0 +1,82 @@ +function frameType = SSC(~, nextFrameT, prevFrameType) +%Implementation of the SSC step +% Usage frameType = SSC(frameT, nextFrameT, prevFrameType), where: +% Inputs +% - frameT is a frame in the time domain, containing both channels of +% the audio stored in an array of dimensions 2048X2 +% - nextFrameT is the next frame in the time domain, containing both +% channels of the audio stored in an array of dimensions 2048X2 +% - prevFrameType is the type of the previous frame in string +% representation, can be one of "OLS" (ONLY_LONG_SEQUENCE), "LSS" +% (LONG_START_SEQUENCE), "ESH" (EIGHT_SHORT_SEQUENCE), "LPS" +% (LONG_STOP_SEQUENCE) +% +% Output +% - frameType is the type of the current frame in string +% representation, can be one of "OLS" (ONLY_LONG_SEQUENCE), "LSS" +% (LONG_START_SEQUENCE), "ESH" (EIGHT_SHORT_SEQUENCE), "LPS" +% (LONG_STOP_SEQUENCE) + + % Examines the cases where the determination of the type of the next + % frame isn't needed for + if strcmp(prevFrameType, 'LSS') + frameType = 'ESH'; + return; + elseif strcmp(prevFrameType, 'LPS') + frameType = 'OLS'; + return; + end + + % Determines the type of the next frame + % Filters frame + nextFrameT = filter([0.7548, -0.7548], [1, -0.5095], nextFrameT, [], 2); + + channelFrameType = {'OLS', 'OLS'}; + + for channel = 1:2 + % Calculates sub-frame energy estimation + [subFrames, ~] = buffer(nextFrameT(449:end-448, channel), 256, 128, 'nodelay'); + energyEstimations = sum(subFrames.^2, 1); + + % Calculates the ratio of the sub-frame energy to the average energy of + % the previous sub-frames + nextIsESH = 0; + for subFrameIndex = 1:8 + energyRatio = energyEstimations(subFrameIndex) / ... + mean(energyEstimations(1:subFrameIndex-1)); + + if (energyEstimations(subFrameIndex) > 10^(-3)) && (energyRatio > 10) + nextIsESH = 1; + break; + end + end + + if nextIsESH == 1 + if strcmp(prevFrameType, 'ESH') + % This frame of this channel is an EIGHT_SHORT_SEQUENCE type + % frame. This means the frames of both channels will be encoded + % as EIGHT_SHORT_SEQUENCE type frames. + frameType = 'ESH'; + return; + elseif strcmp(prevFrameType, 'OLS') + channelFrameType{channel} = 'LSS'; + end + else + if strcmp(prevFrameType, 'ESH') + channelFrameType{channel} = 'LPS'; + elseif strcmp(prevFrameType, 'OLS') + channelFrameType{channel} = 'OLS'; + end + end + end + + if strcmp(channelFrameType{1}, 'OLS') + frameType = channelFrameType{2}; + elseif strcmp(channelFrameType{2}, 'OLS') + frameType = channelFrameType{1}; + elseif strcmp(channelFrameType{1}, channelFrameType{2}) + frameType = channelFrameType{1}; + else + frameType = 'ESH'; + end +end diff --git a/Level_1/demoAAC1.m b/Level_1/demoAAC1.m new file mode 100644 index 0000000..38cba32 --- /dev/null +++ b/Level_1/demoAAC1.m @@ -0,0 +1,19 @@ +function SNR = demoAAC1(fNameIn, fNameOut) +%Implementation of WHAT?? //TODO!! +% Usage SNR = demoAAC1(fNameIn, fNameOut), where: +% Inputs +% - fNameIn is the filename and path of the file to encode +% - fNameOut is the filename and path of the file that will be +% written after decoding +% +% Output +% - SNR is the signal to noise ration computed after successively +% encoding and decoding the audio signal + + AACSeq1 = AACoder1(fNameIn); + decodedAudio = iAACoder1(AACSeq1, fNameOut); + + [audioData, ~] = audioread(fNameIn); + SNR = sum(10*log10((sum(audioData(1:length(decodedAudio), :)) .^ 2)./ ... + (sum(audioData(1:length(decodedAudio), :) - decodedAudio) .^ 2))); +end diff --git a/Level_1/filterbank.m b/Level_1/filterbank.m new file mode 100644 index 0000000..f1a1f7b --- /dev/null +++ b/Level_1/filterbank.m @@ -0,0 +1,103 @@ +function frameF = filterbank(frameT, frameType, winType) +%Implementation of the Filter Bank step +% Usage frameF = filterbank(frameT, frameType, winType), where: +% Inputs +% - frameT is a frame in the time domain, containing both channels of +% the audio stored in an array of dimensions 2048X2 +% - frameType is the type of the current frame in string +% representation, can be one of "OLS" (ONLY_LONG_SEQUENCE), "LSS" +% (LONG_START_SEQUENCE), "ESH" (EIGHT_SHORT_SEQUENCE), "LPS" +% (LONG_STOP_SEQUENCE) +% - winType is the type of the window selected, can be one of "KBD", +% "SIN" +% +% Output +% - frameF is the frame in the frequency domain, in MDCT coefficients +% representation containing both channels of the audio stored in an +% array of dimensions 1024X2 + + % Declares persistent windows variables and initializes if empty + persistent kaiserWindowLong kaiserWindowShort sinWindowLong sinWindowShort; + if isempty(kaiserWindowLong) || isempty(kaiserWindowShort) || ... + isempty(sinWindowLong) || isempty(sinWindowShort) + kaiserLong = kaiser(1024, 6*pi); + kaiserSumLong = sum(kaiserLong); + kaiserShort = kaiser(128, 4*pi); + kaiserSumShort = sum(kaiserShort); + + for n = 1:1024 + kaiserWindowLong(n) = sqrt(sum(kaiserLong(1:n))/kaiserSumLong); + kaiserWindowLong(1024 + n) = sqrt(sum(kaiserLong(1:end-n+1))/kaiserSumLong); + + sinWindowLong(n) = sin(pi*(n + 0.5)/2048); + sinWindowLong(1024 + n) = sin(pi*(1024 + n + 0.5)/2048); + + end + + for n = 1:128 + kaiserWindowShort(n) = sqrt(sum(kaiserShort(1:n))/kaiserSumShort); + kaiserWindowShort(128 + n) = sqrt(sum(kaiserShort(1:end-n+1))/kaiserSumShort); + + sinWindowShort(n) = sin(pi*(n + 0.5)/256); + sinWindowShort(128 + n) = sin(pi*(128 + n + 0.5)/256); + end + end + + frameF(1024, 2) = 0; + + % Applies appropriate window to the frame + for channel=1:2 + if strcmp(frameType, 'OLS') + if strcmp(winType, 'KBD') + frameT(:, channel) = frameT(:, channel) .* kaiserWindowLong(:); + elseif strcmp(winType, 'SIN') + frameT(:, channel) = frameT(:, channel) .* sinWindowLong(:); + else + error('filterbank, l[20]: Unsupported window type input!') + end + + frameF = mdct4(frameT); + elseif strcmp(frameType, 'LSS') + if strcmp(winType, 'KBD') + frameT(1:1024, channel) = frameT(1:1024, channel) .* kaiserWindowLong(1:1024)'; + frameT(1473:1600, channel) = frameT(1473:1600, channel) .* kaiserWindowShort(129:end)'; + frameT(1601:end, channel) = 0; + elseif strcmp(winType, 'SIN') + frameT(1:1024, channel) = frameT(1:1024, channel) .* sinWindowLong(1:1024)'; + frameT(1473:1600, channel) = frameT(1473:1600, channel) .* sinWindowShort(129:end)'; + frameT(1601:end, channel) = 0; + else + error('filterbank, l[20]: Unsupported window type input!') + end + + frameF = mdct4(frameT); + elseif strcmp(frameType, 'LPS') + if strcmp(winType, 'KBD') + frameT(1:448, channel) = 0; + frameT(449:576, channel) = frameT(449:576, channel) .* kaiserWindowShort(1:128)'; + frameT(1025:end, channel) = frameT(1025:end, channel) .* kaiserWindowLong(1025:end)'; + elseif strcmp(winType, 'SIN') + frameT(1:448, channel) = 0; + frameT(449:576, channel) = frameT(449:576, channel) .* sinWindowShort(1:128)'; + frameT(1025:end, channel) = frameT(1025:end, channel) .* sinWindowLong(1025:end)'; + else + error('filterbank, l[20]: Unsupported window type input!') + end + + frameF = mdct4(frameT); + elseif strcmp(frameType, 'ESH') + % Splits the frame into sub-frames + [subFrames, ~] = buffer(frameT(449:end-448, channel), 256, 128, 'nodelay'); + + if strcmp(winType, 'KBD') + subFrames = subFrames .* repmat(kaiserWindowShort', [1 8]); + elseif strcmp(winType, 'SIN') + subFrames = subFrames .* repmat(sinWindowShort', [1 8]); + end + + for subFrameIndex = 1:8 + frameF((subFrameIndex - 1) * 128 + 1:subFrameIndex * 128, channel) = mdct4(subFrames(:, subFrameIndex)); + end + end + end +end diff --git a/Level_1/iAACoder1.m b/Level_1/iAACoder1.m new file mode 100644 index 0000000..a1ee75d --- /dev/null +++ b/Level_1/iAACoder1.m @@ -0,0 +1,30 @@ +function x = iAACoder1(AACSeq1, fNameOut) +%Implementation of WHAT?? //TODO!! +% Usage x = iAACoder1(AACSeq1, fNameOut), where: +% Inputs +% - fNameOut is the filename and path of the file that will be +% written after decoding +% - AACSeq1 is an array of structs containing K structs, where K is +% the number of computed frames. Every struct of the array consists +% of a frameType, a winType, chl.frameF which are the MDCT +% coefficients of this frame's left channel, chr.frameF which are +% the MDCT coefficients of this frame's right channel +% +% Output +% - x is an array containing the decoded audio samples + + decodedAudio(1024 * (length(AACSeq1) + 1), 2) = 0; + frameF(1024, 2) = 0; + for i = 0:length(AACSeq1)-1 + currFrameStart = i * 1024 + 1; + currFrameStop = currFrameStart + 2047; + frameF(:, 1) = AACSeq1(i+1).chl.frameF; + frameF(:, 2) = AACSeq1(i+1).chr.frameF; + frameT = iFilterbank(frameF, AACSeq1(i+1).frameType, AACSeq1(i+1).winType); + + decodedAudio(currFrameStart:currFrameStop, :) = decodedAudio(currFrameStart:currFrameStop, :) + frameT; + end + + audiowrite(fNameOut, decodedAudio, 48000); + x = decodedAudio; +end diff --git a/Level_1/iFilterbank.m b/Level_1/iFilterbank.m new file mode 100644 index 0000000..ad812fd --- /dev/null +++ b/Level_1/iFilterbank.m @@ -0,0 +1,103 @@ +function frameT = iFilterbank(frameF, frameType, winType) +%Implementation of the Inverse Filter Bank step +% Usage frameT = iFilterbank(frameF, frameType, winType), where: +% Inputs +% - frameF is the frame in the frequency domain, in MDCT coefficients +% representation containing both channels of the audio stored in an +% array of dimensions 1024X2 +% - frameType is the type of the current frame in string +% representation, can be one of "OLS" (ONLY_LONG_SEQUENCE), "LSS" +% (LONG_START_SEQUENCE), "ESH" (EIGHT_SHORT_SEQUENCE), "LPS" +% (LONG_STOP_SEQUENCE) +% - winType is the type of the window selected, can be one of "KBD", +% "SIN" +% +% Output +% - frameT is a frame in the time domain, containing both channels of +% the audio stored in an array of dimensions 2048X2 + + % Declares persistent windows variables and initializes if empty + persistent kaiserWindowLong kaiserWindowShort sinWindowLong sinWindowShort; + if isempty(kaiserWindowLong) || isempty(kaiserWindowShort) || ... + isempty(sinWindowLong) || isempty(sinWindowShort) + kaiserLong = kaiser(1024, 6*pi); + kaiserSumLong = sum(kaiserLong); + kaiserShort = kaiser(128, 4*pi); + kaiserSumShort = sum(kaiserShort); + + for n = 1:1024 + kaiserWindowLong(n) = sqrt(sum(kaiserLong(1:n))/kaiserSumLong); + kaiserWindowLong(1024 + n) = sqrt(sum(kaiserLong(1:end-n+1))/kaiserSumLong); + + sinWindowLong(n) = sin(pi*(n + 0.5)/2048); + sinWindowLong(1024 + n) = sin(pi*(1024 + n + 0.5)/2048); + + end + + for n = 1:128 + kaiserWindowShort(n) = sqrt(sum(kaiserShort(1:n))/kaiserSumShort); + kaiserWindowShort(128 + n) = sqrt(sum(kaiserShort(1:end-n+1))/kaiserSumShort); + + sinWindowShort(n) = sin(pi*(n + 0.5)/256); + sinWindowShort(128 + n) = sin(pi*(128 + n + 0.5)/256); + end + end + + frameT(2048, 2) = 0; + + % Applies appropriate window to the frame + for channel=1:2 + if strcmp(frameType, 'OLS') + frameT = imdct4(frameF); + + if strcmp(winType, 'KBD') + frameT(:, channel) = frameT(:, channel) .* kaiserWindowLong(:); + elseif strcmp(winType, 'SIN') + frameT(:, channel) = frameT(:, channel) .* sinWindowLong(:); + else + error('filterbank, l[20]: Unsupported window type input!') + end + elseif strcmp(frameType, 'LSS') + frameT = imdct4(frameF); + + if strcmp(winType, 'KBD') + frameT(1:1024, channel) = frameT(1:1024, channel) .* kaiserWindowLong(1:1024)'; + frameT(1473:1600, channel) = frameT(1473:1600, channel) .* kaiserWindowShort(129:end)'; + frameT(1601:end, channel) = 0; + elseif strcmp(winType, 'SIN') + frameT(1:1024, channel) = frameT(1:1024, channel) .* sinWindowLong(1:1024)'; + frameT(1473:1600, channel) = frameT(1473:1600, channel) .* sinWindowShort(129:end)'; + frameT(1601:end, channel) = 0; + else + error('filterbank, l[20]: Unsupported window type input!') + end + elseif strcmp(frameType, 'LPS') + frameT = imdct4(frameF); + + if strcmp(winType, 'KBD') + frameT(1:448, channel) = 0; + frameT(449:576, channel) = frameT(449:576, channel) .* kaiserWindowShort(1:128)'; + frameT(1025:end, channel) = frameT(1025:end, channel) .* kaiserWindowLong(1025:end)'; + elseif strcmp(winType, 'SIN') + frameT(1:448, channel) = 0; + frameT(449:576, channel) = frameT(449:576, channel) .* sinWindowShort(1:128)'; + frameT(1025:end, channel) = frameT(1025:end, channel) .* sinWindowLong(1025:end)'; + else + error('filterbank, l[20]: Unsupported window type input!') + end + elseif strcmp(frameType, 'ESH') + for subFrameIndex = 1:8 + subFrame = imdct4(frameF((subFrameIndex - 1) * 128 + 1:subFrameIndex * 128, channel)); + + if strcmp(winType, 'KBD') + subFrame = subFrame .* kaiserWindowShort'; + elseif strcmp(winType, 'SIN') + subFrame = subFrame .* sinWindowShort'; + end + + frameT(449 + (subFrameIndex - 1) * 128 + 1:449 + (subFrameIndex + 1) * 128) = ... + frameT(449 + (subFrameIndex - 1) * 128 + 1:449 + (subFrameIndex + 1) * 128) + subFrame'; + end + end + end +end diff --git a/Level_1/imdct4.m b/Level_1/imdct4.m new file mode 100644 index 0000000..e81887c --- /dev/null +++ b/Level_1/imdct4.m @@ -0,0 +1,61 @@ + +function y = imdct4(x) +% IMDCT4 Calculates the Modified Discrete Cosine Transform +% y = imdct4(x) +% +% x: input signal (can be either a column or frame per column) +% y: IMDCT of x +% +% Vectorize ! ! ! + +% ------- imdct4.m ----------------------------------------- +% Marios Athineos, marios@ee.columbia.edu +% http://www.ee.columbia.edu/~marios/ +% Copyright (c) 2002 by Columbia University. +% All rights reserved. +% ---------------------------------------------------------- + +[flen,fnum] = size(x); +% Make column if it's a single row +if (flen==1) + x = x(:); + flen = fnum; + fnum = 1; +end + +% We need these for furmulas below +N = flen; +M = N/2; +twoN = 2*N; +sqrtN = sqrt(twoN); + +% We need this twice so keep it around +t = (0:(M-1)).'; +w = diag(sparse(exp(-j*2*pi*(t+1/8)/twoN))); + +% Pre-twiddle +t = (0:(M-1)).'; +c = x(2*t+1,:) + j*x(N-1-2*t+1,:); +c = (0.5*w)*c; + +% FFT for N/2 points only !!! +c = fft(c,M); + +% Post-twiddle +c = ((8/sqrtN)*w)*c; + +% Preallocate rotation matrix +rot = zeros(twoN,fnum); + +% Sort +t = (0:(M-1)).'; +rot(2*t+1,:) = real(c(t+1,:)); +rot(N+2*t+1,:) = imag(c(t+1,:)); +t = (1:2:(twoN-1)).'; +rot(t+1,:) = -rot(twoN-1-t+1,:); + +% Shift +t = (0:(3*M-1)).'; +y(t+1,:) = rot(t+M+1,:); +t = (3*M:(twoN-1)).'; +y(t+1,:) = -rot(t-3*M+1,:); \ No newline at end of file diff --git a/Level_1/mdct4.m b/Level_1/mdct4.m new file mode 100644 index 0000000..37c8f11 --- /dev/null +++ b/Level_1/mdct4.m @@ -0,0 +1,75 @@ + +function y = mdct4(x) +% MDCT4 Calculates the Modified Discrete Cosine Transform +% y = mdct4(x) +% +% Use either a Sine or a Kaiser-Bessel Derived window (KBDWin)with +% 50% overlap for perfect TDAC reconstruction. +% Remember that MDCT coefs are symmetric: y(k)=-y(N-k-1) so the full +% matrix (N) of coefs is: yf = [y;-flipud(y)]; +% +% x: input signal (can be either a column or frame per column) +% length of x must be a integer multiple of 4 (each frame) +% y: MDCT of x (coefs are divided by sqrt(N)) +% +% Vectorize ! ! ! + +% ------- mdct4.m ------------------------------------------ +% Marios Athineos, marios@ee.columbia.edu +% http://www.ee.columbia.edu/~marios/ +% Copyright (c) 2002 by Columbia University. +% All rights reserved. +% ---------------------------------------------------------- + +[flen,fnum] = size(x); +% Make column if it's a single row +if (flen==1) + x = x(:); + flen = fnum; + fnum = 1; +end +% Make sure length is multiple of 4 +if (rem(flen,4)~=0) + error('MDCT4 defined for lengths multiple of four.'); +end + +% We need these for furmulas below +N = flen; % Length of window +M = N/2; % Number of coefficients +N4 = N/4; % Simplify the way eqs look +sqrtN = sqrt(N); + +% Preallocate rotation matrix +% It would be nice to be able to do it in-place but we cannot +% cause of the prerotation. +rot = zeros(flen,fnum); + +% Shift +t = (0:(N4-1)).'; +rot(t+1,:) = -x(t+3*N4+1,:); +t = (N4:(N-1)).'; +rot(t+1,:) = x(t-N4+1,:); +clear x; + +% We need this twice so keep it around +t = (0:(N4-1)).'; +w = diag(sparse(exp(-j*2*pi*(t+1/8)/N))); + +% Pre-twiddle +t = (0:(N4-1)).'; +c = (rot(2*t+1,:)-rot(N-1-2*t+1,:))... + -j*(rot(M+2*t+1,:)-rot(M-1-2*t+1,:)); +% This is a really cool Matlab trick ;) +c = 0.5*w*c; +clear rot; + +% FFT for N/4 points only !!! +c = fft(c,N4); + +% Post-twiddle +c = (2/sqrtN)*w*c; + +% Sort +t = (0:(N4-1)).'; +y(2*t+1,:) = real(c(t+1,:)); +y(M-1-2*t+1,:) = -imag(c(t+1,:)); \ No newline at end of file