diff --git a/Level_3/AACoder3.m b/Level_3/AACoder3.m new file mode 100644 index 0000000..bfc0156 --- /dev/null +++ b/Level_3/AACoder3.m @@ -0,0 +1,123 @@ +function AACSeq3 = AACoder3(fNameIn, fnameAACoded) +%Implementation of AAC encoder +% Usage AACSeq3 = AACoder3(fNameIn, fnameAACoded), where: +% Inputs +% - fNameIn is the filename and path of the file to encode +% - frameAACoded is the filename and path of the mat file that will +% be written after encoding +% +% Output +% - AACSeq3 is an array of structs containing K structs, where K is +% the number of computed frames. Every struct of the array consists +% of: +% * a frameType, +% * a winType, +% * chl.TNScoeffs which are the quantized TNS coefficients of +% this frame's left channel, +% * chr.TNScoeffs which are the quantized TNS coefficients of +% this frame's right channel, +% * chl.T which are the psychoacoustic thresholds of this frame's +% left channel, +% * chr.T which are the psychoacoustic thresholds of this frame's +% right channel, +% * chl.G which are the quantized global gains of this frame's +% left channel, +% * chr.G which are the quantized global gains of this frame's +% right channel, +% * chl.sfc which is the Huffman encoded sfc sequence of this +% frame's left channel, +% * chr.sfc which is the Huffman encoded sfc sequence of this +% frame's right channel, +% * chl.stream which is the Huffman encoded quantized MDCT +% sequence of this frame's left channel, +% * chr.stream which is the Huffman encoded quantized MDCT +% sequence of this frame's right channel, +% * chl.codebook which is the Huffman codebook used for this +% frame's left channel +% * chr.codebook which is the Huffman codebook used for this +% frame's right channel + + % Declares constant window type + WINDOW_TYPE = 'SIN'; + + % Reads the audio file + [originalAudioData, ~] = audioread(fNameIn); + + % Splits the audio in frames and determines the type of each frame + frameTypes{fix((length(originalAudioData) - 1025) / 1024)} = 'OLS'; + frameTypes{1} = 'OLS'; + for i = 1:length(frameTypes) - 2 + nextFrameStart = (i + 1) * 1024 + 1; + nextFrameStop = nextFrameStart + 2047; + frameTypes{i+1} = SSC(1, originalAudioData(nextFrameStart:nextFrameStop, :), frameTypes{i}); + end + + % Assigns a type to the last frame + if strcmp(frameTypes{length(frameTypes) - 1}, 'LSS') + frameTypes{length(frameTypes)} = 'ESH'; + elseif strcmp(frameTypes{length(frameTypes) - 1}, 'ESH') + frameTypes{length(frameTypes)} = 'ESH'; + else + frameTypes{length(frameTypes)} = 'OLS'; + end + + % Encodes audio file + huffLUT = loadLUT(); + AACSeq3(length(frameTypes)) = struct; + for i = 0:length(frameTypes) - 1 + currFrameStart = i * 1024 + 1; + currFrameStop = currFrameStart + 2047; + frameF = filterbank(originalAudioData(currFrameStart:currFrameStop, :), frameTypes{i+1}, WINDOW_TYPE); + [frameF(:, 1), TNScoeffsL] = TNS(frameF(:, 1), frameTypes{i+1}); + [frameF(:, 2), TNScoeffsR] = TNS(frameF(:, 2), frameTypes{i+1}); + + if i < 2 + % TODO: what happens on the first two frames? + else + prev1FrameStart = (i - 1) * 1024 + 1; + prev1FrameStop = prev1FrameStart + 2047; + prev2FrameStart = (i - 2) * 1024 + 1; + prev2FrameStop = prev1FrameStart + 2047; + SMR = psycho(... + originalAudioData(currFrameStart:currFrameStop, :), ... + frameTypes{i+1}, ... + originalAudioData(prev1FrameStart:prev1FrameStop, :), ... + originalAudioData(prev2FrameStart:prev2FrameStop, :)); + [SL, sfcL, GL] = AACquantizer(frameF, frameTypes{i+1}, SMR); + [SR, sfcR, GR] = AACquantizer(frameF, frameTypes{i+1}, SMR); + end + + [streamL, huffcodebookL] = encodeHuff(SL, huffLUT); + [streamR, huffcodebookR] = encodeHuff(SR, huffLUT); + [sfcL, ~] = encodeHuff(sfcL, huffLUT, 12); + [sfcR, ~] = encodeHuff(sfcR, huffLUT, 12); + + AACSeq3(i + 1).frameType = frameTypes(i + 1); + AACSeq3(i + 1).winType = WINDOW_TYPE; + AACSeq3(i + 1).chl.TNScoeffs = TNScoeffsL; + AACSeq3(i + 1).chr.TNScoeffs = TNScoeffsR; + AACSeq3(i + 1).chl.T = what; % TODO: find dis shit + AACSeq3(i + 1).chr.T = what; + AACSeq3(i + 1).chl.G = GL; + AACSeq3(i + 1).chr.G = GR; + AACSeq3(i + 1).chl.sfc = sfcL; + AACSeq3(i + 1).chr.sfc = sfcR; + AACSeq3(i + 1).chl.stream = streamL; + AACSeq3(i + 1).chr.stream = streamR; + AACSeq3(i + 1).chl.codebook = huffcodebookL; + AACSeq3(i + 1).chr.codebook = huffcodebookR; + end + + save(fnameAACoded,AACSeq3); + + if false + [idx,label] = grp2idx(sort(frameTypes)); + hist(idx,unique(idx)); + set(gca,'xTickLabel',label) + + sum(idx(:) == 1) + sum(idx(:) == 2) + sum(idx(:) == 3) + sum(idx(:) == 4) + end +end diff --git a/Level_3/AACquantizer.m b/Level_3/AACquantizer.m new file mode 100644 index 0000000..8cb4af4 --- /dev/null +++ b/Level_3/AACquantizer.m @@ -0,0 +1,26 @@ +function [S, sfc, G] = AACquantizer(frameF, frameType, SMR) +%Implementation of Quantizer +% Usage [S, sfc, G] = AACquantizer(frameF, frameType, SMR), where: +% Inputs +% - frameF is the frame in the frequency domain, in MDCT coefficients +% representation containing only one of the audio channels stored in +% a vector of length 1024 +% - frameType is the type of the current frame in string +% representation, can be one of "OLS" (ONLY_LONG_SEQUENCE), "LSS" +% (LONG_START_SEQUENCE), "ESH" (EIGHT_SHORT_SEQUENCE), "LPS" +% (LONG_STOP_SEQUENCE) +% - SMR is the signal to mask ratio array of dimensions 42X8 for +% EIGHT_SHORT_SEQUENCE frames and 69X1 otherwise +% +% Output +% - S are the MDCT quantization symbols of one audio channel stored +% in a vector of length 1024 +% - sfc are the scalefactors per band stored in an array of +% dimensions NBX8 for EIGHT_SHORT_SEQUENCE frames and NBX1 +% otherwise, where NB is the number of bands +% - G is the global gain stored in an array of dimensions 1X8 for +% EIGHT_SHORT_SEQUENCE frames and a single value otherwise + + +end + diff --git a/Level_3/LicorDeCalandraca.wav b/Level_3/LicorDeCalandraca.wav new file mode 100644 index 0000000..a527e8c Binary files /dev/null and b/Level_3/LicorDeCalandraca.wav differ diff --git a/Level_3/SSC.m b/Level_3/SSC.m new file mode 100644 index 0000000..e0f4502 --- /dev/null +++ b/Level_3/SSC.m @@ -0,0 +1,78 @@ +function frameType = SSC(~, nextFrameT, prevFrameType) +%Implementation of the Sequence Segmentation Control step +% Usage frameType = SSC(frameT, nextFrameT, prevFrameType), where: +% Inputs +% - frameT is a frame in the time domain, containing both channels of +% the audio stored in an array of dimensions 2048X2 +% - nextFrameT is the next frame in the time domain, containing both +% channels of the audio stored in an array of dimensions 2048X2 +% - prevFrameType is the type of the previous frame in string +% representation, can be one of "OLS" (ONLY_LONG_SEQUENCE), "LSS" +% (LONG_START_SEQUENCE), "ESH" (EIGHT_SHORT_SEQUENCE), "LPS" +% (LONG_STOP_SEQUENCE) +% +% Output +% - frameType is the type of the current frame in string +% representation, can be one of "OLS" (ONLY_LONG_SEQUENCE), "LSS" +% (LONG_START_SEQUENCE), "ESH" (EIGHT_SHORT_SEQUENCE), "LPS" +% (LONG_STOP_SEQUENCE) + + % Examines the cases where the determination of the type of the next + % frame isn't needed for + if strcmp(prevFrameType, 'LSS') + frameType = 'ESH'; + return; + elseif strcmp(prevFrameType, 'LPS') + frameType = 'OLS'; + return; + end + + % Determines the type of the next frame + % Filters frame + nextFrameT = filter([0.7548 -0.7548], [1 -0.5095], nextFrameT, [], 1); + channelFrameType = {'nan', 'nan'}; + for channel = 1:2 + % Calculates sub-frame energy estimation + [subFrames, ~] = buffer(nextFrameT(577:end - 448, channel), 128, 0, 'nodelay'); + energyEstimations = sum(subFrames .^ 2, 1); + + % Calculates the ratio of the sub-frame energy to the average energy of + % the previous sub-frames + energyRatios = movmean(energyEstimations, [8 0]); + energyRatios = energyEstimations ./ [energyEstimations(1) energyRatios(1:end-1)]; + + if ~isempty(find(energyEstimations > 10^(-3), 1)) && ... + ~isempty(find(energyRatios(energyEstimations > 10^(-3)) > 10, 1)) + % Next frame is ESH + if strcmp(prevFrameType, 'ESH') + % This frame of this channel is an EIGHT_SHORT_SEQUENCE type + % frame. This means the frames of both channels will be encoded + % as EIGHT_SHORT_SEQUENCE type frames. + frameType = 'ESH'; + return; + elseif strcmp(prevFrameType, 'OLS') + channelFrameType{channel} = 'LSS'; + end + else + if strcmp(prevFrameType, 'ESH') + channelFrameType{channel} = 'LPS'; + elseif strcmp(prevFrameType, 'OLS') + channelFrameType{channel} = 'OLS'; + end + end + end + + % Joins decision for both channels + if strcmp(channelFrameType{1}, 'nan') || strcmp(channelFrameType{2}, 'nan') + error('SSC, l[73]: Internal error occurred!') + end + if strcmp(channelFrameType{1}, 'OLS') + frameType = channelFrameType{2}; + elseif strcmp(channelFrameType{2}, 'OLS') + frameType = channelFrameType{1}; + elseif strcmp(channelFrameType{1}, channelFrameType{2}) + frameType = channelFrameType{1}; + else + frameType = 'ESH'; + end +end diff --git a/Level_3/TNS.m b/Level_3/TNS.m new file mode 100644 index 0000000..672c621 --- /dev/null +++ b/Level_3/TNS.m @@ -0,0 +1,119 @@ +function [frameFout, TNScoeffs] = TNS(frameFin, frameType) +%Implementation of the TNS step +% Usage [frameFout, TNScoeffs] = TNS(frameFin, frameType), where: +% Inputs +% - frameFin is the frame in the frequency domain, in MDCT coefficients +% representation containing both channels of the audio stored in an +% array of dimensions 1024X2 +% - frameType is the type of the current frame in string +% representation, can be one of "OLS" (ONLY_LONG_SEQUENCE), "LSS" +% (LONG_START_SEQUENCE), "ESH" (EIGHT_SHORT_SEQUENCE), "LPS" +% (LONG_STOP_SEQUENCE) +% +% Output +% - frameFout is the frame in the frequency domain after Temporal Noise +% Shaping, in MDCT coefficients representation containing both channels +% of the audio stored in an array of dimensions 1024X2 +% - TNScoeffs is the quantized TNS coefficients array of dimensions +% 4X8 for EIGHT_SHORT_SEQUENCE frames and 4X1 otherwise + + % Declares constant numbers of bands for long and short windows + LONG_WINDOW_NUMBER_OF_BANDS = 69; + SHORT_WINDOW_NUMBER_OF_BANDS = 42; + % Declares constant order of the linear prediction filter + LPF_ORDER = 4; + % Declares constant coefficients' resolution, in the form of number of + % decimal digits used + COEF_RES = 1; + + % Declares persistent variable holding the TNS tables and initializes if empty + persistent TNSTables; + if isempty(TNSTables) + TNSTables = load('TableB219.mat'); + end + + if ~strcmp(frameType, 'ESH') + % Calculates the energy per band + bandEnergy(LONG_WINDOW_NUMBER_OF_BANDS) = 0; + for band = 1:LONG_WINDOW_NUMBER_OF_BANDS - 1 + bandEnergy(band) = sumsqr(frameFin(TNSTables.B219a(band, 2) + 1:TNSTables.B219a(band + 1, 2))); + end + bandEnergy(LONG_WINDOW_NUMBER_OF_BANDS) = sumsqr(frameFin( ... + TNSTables.B219a(LONG_WINDOW_NUMBER_OF_BANDS, 2) + 1:end)); + + % Calculates the normalization factors + bandIndices = quantiz(0:1023, TNSTables.B219a(:, 2) - 1); + normalizationFactor = sqrt(bandEnergy(bandIndices)); + + % Smooths normalization factors + for normIndex = 1023:-1:1 + normalizationFactor(normIndex) = (normalizationFactor(normIndex) + normalizationFactor(normIndex + 1)) / 2; + end + for normIndex = 2:1024 + normalizationFactor(normIndex) = (normalizationFactor(normIndex) + normalizationFactor(normIndex - 1)) / 2; + end + + % Normalizes MDCT coefficients according to each band energy + normalizedFrameFin = frameFin ./ normalizationFactor'; + + % Calculates the linear prediction coefficients + [linPredCoeff, ~] = lpc(normalizedFrameFin, LPF_ORDER); + + % Quantizes these coefficients + quantizedLinPredCoeff = round(linPredCoeff, COEF_RES); + + % Filters MDCT coefficients + if ~isstable(1, quantizedLinPredCoeff) + error('TNS, l[79]: Inverse filter not stable!'); + else + TNScoeffs = quantizedLinPredCoeff(2:end); + frameFout = filter(quantizedLinPredCoeff, 1, frameFin); + end + else + % Initializes output vectors + TNScoeffs(LPF_ORDER * 8) = 0; + frameFout(1024) = 0; + + bandEnergy(SHORT_WINDOW_NUMBER_OF_BANDS) = 0; + for subFrameIndex = 1:8 + subFrame = frameFin((subFrameIndex - 1) * 128 + 1:subFrameIndex * 128); + + % Calculates the energy per band + for band = 1:SHORT_WINDOW_NUMBER_OF_BANDS - 1 + bandEnergy(band) = sumsqr(subFrame(TNSTables.B219b(band, 2) + 1:TNSTables.B219b(band + 1, 2))); + end + bandEnergy(SHORT_WINDOW_NUMBER_OF_BANDS) = sumsqr(subFrame( ... + TNSTables.B219b(SHORT_WINDOW_NUMBER_OF_BANDS, 2) + 1:end)); + + % Calculates the normalization factors + bandIndices = quantiz(0:127, TNSTables.B219b(:, 2) - 1); + normalizationFactor = sqrt(bandEnergy(bandIndices)); + + % Smooths normalization factors + for normIndex = 127:-1:1 + normalizationFactor(normIndex) = (normalizationFactor(normIndex) + normalizationFactor(normIndex + 1)) / 2; + end + for normIndex = 2:128 + normalizationFactor(normIndex) = (normalizationFactor(normIndex) + normalizationFactor(normIndex - 1)) / 2; + end + + % Normalizes MDCT coefficients according to each band energy + normalizedFrameFin = subFrame ./ normalizationFactor'; + + % Calculates the linear prediction coefficients + [linPredCoeff, ~] = lpc(normalizedFrameFin, LPF_ORDER); + + % Quantizes these coefficients + quantizedLinPredCoeff = round(linPredCoeff, COEF_RES); + + % Filters MDCT coefficients + if ~isstable(1, quantizedLinPredCoeff) + error('TNS, l[79]: Inverse filter not stable!'); + else + TNScoeffs((subFrameIndex - 1) * 4 + 1:subFrameIndex * 4) = quantizedLinPredCoeff(2:end); + frameFout((subFrameIndex - 1) * 128 + 1:subFrameIndex * 128) = ... + filter(quantizedLinPredCoeff, 1, subFrame); + end + end + end +end diff --git a/Level_3/TableB219.mat b/Level_3/TableB219.mat new file mode 100644 index 0000000..7b2e403 Binary files /dev/null and b/Level_3/TableB219.mat differ diff --git a/Level_3/decodeHuff.m b/Level_3/decodeHuff.m new file mode 100644 index 0000000..a0de31b --- /dev/null +++ b/Level_3/decodeHuff.m @@ -0,0 +1,82 @@ +function decCoeffs = decodeHuff(huffSec, huffCodebook, huffLUT) +% DECODEHUFF Huffman decoder stage +% +% decCoeffs = decodeHuff(huffSec, huffCodebook, huffLUT) performs huffman +% decoding, where huffSec is a string of '1' and '0' corresponding to the +% Huffman encoded stream, and huffCodebook is the index (0 to 12) of the +% codebook used, as outputted by encodeHuff. huffLUT is the Huffman +% look-up tables to be loaded using loadLUT.m. The output decCoeffs is +% the decoded quantised (integer) values. +% +% CAUTION: due to zero padding the length of decCoeffs may be larger than +% the length of the encoded sequence. Simply ignore values (they should +% be equal to zero) that are outside the index range +% [1:length(encoded sequence)]. + +huffLUTi = huffLUT{huffCodebook}; +h=huffLUTi.invTable; +huffCodebook=huffLUTi.codebook; +nTupleSize=huffLUTi.nTupleSize; +maxAbsCodeVal=huffLUTi.maxAbsCodeVal; +signedValues=huffLUTi.signedValues; +eos=0;%end of stream +decCoeffs=[]; + +huffSec=huffSec-48; +streamIndex=1; +while(~eos)%end of stream + huffFailure=0; + wordbit=0; + r=1;%row indicator + b=huffSec(streamIndex+wordbit); + + %decoding tuple according to inverse matrix + while(1) + b=huffSec(streamIndex+wordbit); + wordbit=wordbit+1; + rOld=r; + r=h(rOld,b+1);%new index + if((h(r,1)==0)&(h(r,2)==0))%reached a leaf (found a valid word) + symbolIndex=h(r,3)-1;%The actual index begins from zero. + streamIndex=streamIndex+wordbit; + break; + end + end + %decoding nTuple magnitudes + if(signedValues) + base=2*maxAbsCodeVal+1; + nTupleDec=rem(floor(symbolIndex*base.^([-nTupleSize+1:1:0])),base); + nTupleDec=nTupleDec-maxAbsCodeVal; + else + base=maxAbsCodeVal+1; + nTupleDec=rem(floor(symbolIndex*base.^([-nTupleSize+1:1:0])),base); + nTupleSignBits=huffSec(streamIndex:streamIndex+nTupleSize-1); + nTupleSign=-sign(nTupleSignBits(:)'-0.5); + streamIndex=streamIndex+nTupleSize; + nTupleDec=nTupleDec.*nTupleSign; + end + escIndex=find(abs(nTupleDec)==16); + if((huffCodebook==11)&&(sum(escIndex))) + for i=1:length(escIndex) + b=huffSec(streamIndex); + N=0; + %reading the escape seq, counting N '1's + while(b) + N=N+1; + b=huffSec(streamIndex+N); + end%last bit read was a zero corresponding to esc_separator.. + streamIndex=streamIndex+N; + %reading the next N+4 bits + N4=N+4; + escape_word=huffSec(streamIndex+1:streamIndex+N4); + nTupleDec(escIndex(i))=2^N4+bin2dec(num2str(escape_word)); + streamIndex=streamIndex+N4+1; + end + nTupleDec(escIndex)=nTupleDec(escIndex).*nTupleSign(escIndex); + end + decCoeffs=[decCoeffs nTupleDec]; + if (streamIndex>length(huffSec)) + eos=1; + end + +end diff --git a/Level_3/demoAAC3.m b/Level_3/demoAAC3.m new file mode 100644 index 0000000..f1c2666 --- /dev/null +++ b/Level_3/demoAAC3.m @@ -0,0 +1,53 @@ +function [SNR, bitrate, compression] = demoAAC3(fNameIn, fNameOut, frameAACoded) +%Function that demonstrates usage of the level 3 code +% Usage [SNR, bitrate, compression] = demoAAC3(fNameIn, fNameOut, frameAACoded), where: +% Inputs +% - fNameIn is the filename and path of the file to encode +% - fNameOut is the filename and path of the wav file that will be +% written after decoding +% - frameAACoded is the filename and path of the mat file that will +% be written after encoding +% +% Output +% - SNR is the signal to noise ration computed after successively +% encoding and decoding the audio signal +% - bitrate is the bits per second +% - compression is the ratio of the bitrate before the encoding to +% the bitrate after it + + AACSeq3 = AACoder3(fNameIn, frameAACoded); + decodedAudio = iAACoder3(AACSeq3, fNameOut); + + [audioData, ~] = audioread(fNameIn); + +% figure() +% plot(audioData(1025:length(decodedAudio)-1024, 1) - decodedAudio(1025:end-1024, 1)) +% for frame = 1:length(AACSeq1) +% if strcmp(AACSeq1(frame).frameType, 'LSS') || strcmp(AACSeq1(frame).frameType, 'LPS') +% % Add lines +% h1 = line([(frame-1)*1024+1 (frame-1)*1024+1],[-2*10^(-15) 2*10^(-15)]); +% h2 = line([frame*1024+1 frame*1024+1],[-2*10^(-15) 2*10^(-15)]); +% % Set properties of lines +% set([h1 h2],'Color','y','LineWidth',2) +% % Add a patch +% patch([(frame-1)*1024+1 frame*1024+1 frame*1024+1 (frame-1)*1024+1],[-2*10^(-15) -2*10^(-15) 2*10^(-15) 2*10^(-15)],'y') +% elseif strcmp(AACSeq1(frame).frameType, 'ESH') +% % Add lines +% h1 = line([(frame-1)*1024+1 (frame-1)*1024+1],[-2*10^(-15) 2*10^(-15)]); +% h2 = line([frame*1024+1 frame*1024+1],[-2*10^(-15) 2*10^(-15)]); +% % Set properties of lines +% set([h1 h2],'Color','r','LineWidth',2) +% % Add a patch +% patch([(frame-1)*1024+1 frame*1024+1 frame*1024+1 (frame-1)*1024+1],[-2*10^(-15) -2*10^(-15) 2*10^(-15) 2*10^(-15)],'r') +% end +% end +% set(gca,'children',flipud(get(gca,'children'))) +% +% figure() +% plot(audioData(1025:length(decodedAudio)-1024, 2) - decodedAudio(1025:end-1024, 2)) + + snr(audioData(1025:length(decodedAudio)-1024, 1), audioData(1025:length(decodedAudio)-1024, 1) - decodedAudio(1025:end-1024, 1)) + snr(audioData(1025:length(decodedAudio)-1024, 2), audioData(1025:length(decodedAudio)-1024, 2) - decodedAudio(1025:end-1024, 2)) + SNR = 10*log10((sum(audioData(1025:length(decodedAudio)-1024, :)) .^ 2) ./ ... + (sum(audioData(1025:length(decodedAudio)-1024, :) - decodedAudio(1025:end-1024, :)) .^ 2)); +end diff --git a/Level_3/encodeHuff.m b/Level_3/encodeHuff.m new file mode 100644 index 0000000..c634664 --- /dev/null +++ b/Level_3/encodeHuff.m @@ -0,0 +1,194 @@ +function [huffSec, huffCodebook] = encodeHuff(coeffSec, huffLUT, forcedCodebook) +% ENCODEHUFF Huffman encoder stage +% +% [huffSec, huffCodebook] = encodeHuff(coeffSec, huffLUT) performs +% huffman coding for quantised (integer) values of a section coeffSec; +% huffLUT are the Huffman look-up tables to be loaded using loadLUT.m +% +% It returns huffSec, a string of '1' and '0' corresponding to the +% Huffman encoded stream, and huffCodebook, the number of the Huffman +% codebook used (see page 147 in w2203tfa.pdf and pages 82-94 in +% w2203tft.pdf). +% +% [huffSec, huffCodebook] = encodeHuff(coeffSec, huffLUT, forcedCodebook) +% forces the codebook forcedCodebook to be used. + + if nargin == 2 + [huffSec, huffCodebook] = encodeHuff1(coeffSec, huffLUT); + else + huffSec = huffLUTCode1(huffLUT{forcedCodebook}, coeffSec); + huffCodebook = forcedCodebook; + end + +function [huffSec, huffCodebook]=encodeHuff1(coeffSec,huffLUT) +maxAbsVal=max(abs(coeffSec)); +ESC=0;%escape sequence used? +switch(maxAbsVal) + case 0 + huffCodebook=0; + [huffSec]=huffLUTCode0(); + case 1 + huffCodebook=[1 2]; +% signedValues=1; +% nTupleSize=4; +% maxAbsCodeVal=1; + [huffSec1]=huffLUTCode1(huffLUT{huffCodebook(1)}, coeffSec); + [huffSec2]=huffLUTCode1(huffLUT{huffCodebook(2)}, coeffSec); + if(length(huffSec1)<=length(huffSec2)) + huffSec=huffSec1; + huffCodebook=huffCodebook(1); + else + huffSec=huffSec2; + huffCodebook=huffCodebook(2); + end + case 2 + huffCodebook=[3 4]; +% signedValues=0; +% nTupleSize=4; +% maxAbsCodeVal=2; + [huffSec1]=huffLUTCode1(huffLUT{huffCodebook(1)}, coeffSec); + [huffSec2]=huffLUTCode1(huffLUT{huffCodebook(2)}, coeffSec); + if(length(huffSec1)<=length(huffSec2)) + huffSec=huffSec1; + huffCodebook=huffCodebook(1); + else + huffSec=huffSec2; + huffCodebook=huffCodebook(2); + end + case {3,4} + huffCodebook=[5 6]; + [huffSec1]=huffLUTCode1(huffLUT{huffCodebook(1)}, coeffSec); + [huffSec2]=huffLUTCode1(huffLUT{huffCodebook(2)}, coeffSec); + if(length(huffSec1)<=length(huffSec2)) + huffSec=huffSec1; + huffCodebook=huffCodebook(1); + else + huffSec=huffSec2; + huffCodebook=huffCodebook(2); + end + case {5,6,7} + huffCodebook=[7 8]; + [huffSec1]=huffLUTCode1(huffLUT{huffCodebook(1)}, coeffSec); + [huffSec2]=huffLUTCode1(huffLUT{huffCodebook(2)}, coeffSec); + if(length(huffSec1)<=length(huffSec2)) + huffSec=huffSec1; + huffCodebook=huffCodebook(1); + else + huffSec=huffSec2; + huffCodebook=huffCodebook(2); + end + case {8,9,10,11,12} + huffCodebook=[9 10]; + [huffSec1]=huffLUTCode1(huffLUT{huffCodebook(1)}, coeffSec); + [huffSec2]=huffLUTCode1(huffLUT{huffCodebook(2)}, coeffSec); + if(length(huffSec1)<=length(huffSec2)) + huffSec=huffSec1; + huffCodebook=huffCodebook(1); + else + huffSec=huffSec2; + huffCodebook=huffCodebook(2); + end + case {13,14,15} + huffCodebook=11; + [huffSec]=huffLUTCode1(huffLUT{huffCodebook(1)}, coeffSec); + otherwise + huffCodebook=11; + [huffSec]=huffLUTCodeESC(huffLUT{huffCodebook(1)}, coeffSec); +end + + + +function [huffSec]=huffLUTCode1(huffLUT, coeffSec) + LUT=huffLUT.LUT; + huffCodebook=huffLUT.codebook; + nTupleSize=huffLUT.nTupleSize; + maxAbsCodeVal=huffLUT.maxAbsCodeVal; + signedValues=huffLUT.signedValues; + numTuples=ceil(length(coeffSec)/nTupleSize); + if(signedValues) + coeffSec=coeffSec+maxAbsCodeVal; + base=2*maxAbsCodeVal+1; + else + base=maxAbsCodeVal+1; + end + coeffSecPad=zeros(1,numTuples*nTupleSize); + coeffSecPad(1:length(coeffSec))=coeffSec; + for i=1:numTuples + nTuple=coeffSecPad((i-1)*nTupleSize+1:i*nTupleSize); + huffIndex=abs(nTuple)*(base.^[nTupleSize-1:-1:0])'; + hexHuff=LUT(huffIndex+1,3); + + hexHuff=dec2hex(hexHuff);%Dec values were saved. Converting to hex + huffSecLen=LUT(huffIndex+1,2); + if(signedValues) + huffSec{i}=dec2bin(hex2dec(hexHuff),huffSecLen); + else + huffSec{i}=[dec2bin(hex2dec(hexHuff),huffSecLen),strcat((num2str(nTuple'<0))')];%appending sign + end + end + huffSec=strcat([huffSec{:}]); + +function [huffSec]=huffLUTCode0() + huffSec=''; + +function [huffSec]=huffLUTCodeESC(huffLUT, coeffSec) + LUT=huffLUT.LUT; + huffCodebook=huffLUT.codebook; + nTupleSize=huffLUT.nTupleSize; + maxAbsCodeVal=huffLUT.maxAbsCodeVal; + signedValues=huffLUT.signedValues; + + numTuples=ceil(length(coeffSec)/nTupleSize); + base=maxAbsCodeVal+1; + coeffSecPad=zeros(1,numTuples*nTupleSize); + coeffSecPad(1:length(coeffSec))=coeffSec; + + nTupleOffset=zeros(1,nTupleSize); + for i=1:numTuples + nTuple=coeffSecPad((i-1)*nTupleSize+1:i*nTupleSize); + lnTuple=nTuple; + lnTuple(lnTuple==0)=eps; + N4=max([0 0; floor(log2(abs(lnTuple)))]); + N=max([0 0 ; N4-4]); + esc=abs(nTuple)>15; + + nTupleESC=nTuple; + nTupleESC(esc)=sign(nTupleESC(esc))*16;%Just keep the sixteens here (nTupleESC). nTuple contains the actual values + + huffIndex=abs(nTupleESC)*(base.^[nTupleSize-1:-1:0])'; + hexHuff=LUT(huffIndex+1,3); + hexHuff=dec2hex(hexHuff);%Dec values were saved. Converting to hex + huffSecLen=LUT(huffIndex+1,2); + + %Adding sufficient ones to the prefix. If N<=0 empty string created + escape_prefix1=''; + escape_prefix2=''; + escape_prefix1(1:N(1))='1'; + escape_prefix2(1:N(2))='1'; + + + %Calculating the escape words. Taking absolute values. Will add the + %sign later on + if esc(1) + escape_separator1='0'; + escape_word1=dec2bin(abs(nTuple(1))-2^(N4(1)),N4(1)); + else + escape_separator1=''; + escape_word1=''; + end + if esc(2) + escape_separator2='0'; + escape_word2=dec2bin(abs(nTuple(2))-2^(N4(2)),N4(2)); + else + escape_separator2=''; + escape_word2=''; + end + + % escape_word1=dec2bin(abs(nTuple(1))-2^(N4(1)),N4(1)); + + escSeq=[escape_prefix1, escape_separator1, escape_word1, escape_prefix2, escape_separator2, escape_word2]; + + %adding the sign bits and the escape sequence + huffSec{i}=[dec2bin(hex2dec(hexHuff),huffSecLen),strcat((num2str(nTuple'<0))'), escSeq];%appending sign + end + huffSec=strcat([huffSec{:}]); diff --git a/Level_3/filterbank.m b/Level_3/filterbank.m new file mode 100644 index 0000000..56ab914 --- /dev/null +++ b/Level_3/filterbank.m @@ -0,0 +1,100 @@ +function frameF = filterbank(frameT, frameType, winType) +%Implementation of the Filter Bank step +% Usage frameF = filterbank(frameT, frameType, winType), where: +% Inputs +% - frameT is a frame in the time domain, containing both channels of +% the audio stored in an array of dimensions 2048X2 +% - frameType is the type of the current frame in string +% representation, can be one of "OLS" (ONLY_LONG_SEQUENCE), "LSS" +% (LONG_START_SEQUENCE), "ESH" (EIGHT_SHORT_SEQUENCE), "LPS" +% (LONG_STOP_SEQUENCE) +% - winType is the type of the window selected, can be one of "KBD", +% "SIN" +% +% Output +% - frameF is the frame in the frequency domain, in MDCT coefficients +% representation containing both channels of the audio stored in an +% array of dimensions 1024X2 + + % Declares persistent windows variables and initializes if empty + persistent kaiserWindowLong kaiserWindowShort sinWindowLong sinWindowShort; + if isempty(kaiserWindowLong) || isempty(kaiserWindowShort) || ... + isempty(sinWindowLong) || isempty(sinWindowShort) + kaiserLong = kaiser(1025, 6*pi); + kaiserSumLong = sum(kaiserLong); + kaiserShort = kaiser(129, 4*pi); + kaiserSumShort = sum(kaiserShort); + + kaiserWindowLong(1:1024) = movsum(kaiserLong(1:1024), [1024 0]); + kaiserWindowLong(1025:2048) = movsum(flipud(kaiserLong(1:1024)), [0 1024]); + kaiserWindowLong = sqrt(kaiserWindowLong ./ kaiserSumLong); + + sinWindowLong = sin(pi * ((0:2047) + 0.5) / 2048); + + kaiserWindowShort(1:128) = movsum(kaiserShort(1:128), [128 0]); + kaiserWindowShort(129:256) = movsum(flipud(kaiserShort(1:128)), [0 128]); + kaiserWindowShort = sqrt(kaiserWindowShort ./ kaiserSumShort); + + sinWindowShort = sin(pi * ((0:255) + 0.5) / 256); + end + + % Initializes output array + frameF(1024, 2) = 0; + + % Applies appropriate window to the frame + if strcmp(frameType, 'OLS') + if strcmp(winType, 'KBD') + frameT = bsxfun(@times, frameT, kaiserWindowLong'); + elseif strcmp(winType, 'SIN') + frameT = bsxfun(@times, frameT, sinWindowLong'); + else + error('filterbank, l[20]: Unsupported window type input!') + end + + frameF = mdct4(frameT); + elseif strcmp(frameType, 'LSS') + if strcmp(winType, 'KBD') + frameT(1:1024, :) = bsxfun(@times, frameT(1:1024, :), kaiserWindowLong(1:1024)'); + frameT(1473:1600, :) = bsxfun(@times, frameT(1473:1600, :), kaiserWindowShort(129:end)'); + frameT(1601:end, :) = 0; + elseif strcmp(winType, 'SIN') + frameT(1:1024, :) = bsxfun(@times, frameT(1:1024, :), sinWindowLong(1:1024)'); + frameT(1473:1600, :) = bsxfun(@times, frameT(1473:1600, :), sinWindowShort(129:end)'); + frameT(1601:end, :) = 0; + else + error('filterbank, l[20]: Unsupported window type input!') + end + + frameF = mdct4(frameT); + elseif strcmp(frameType, 'LPS') + if strcmp(winType, 'KBD') + frameT(1:448, :) = 0; + frameT(449:576, :) = bsxfun(@times, frameT(449:576, :), kaiserWindowShort(1:128)'); + frameT(1025:end, :) = bsxfun(@times, frameT(1025:end, :), kaiserWindowLong(1025:end)'); + elseif strcmp(winType, 'SIN') + frameT(1:448, :) = 0; + frameT(449:576, :) = bsxfun(@times, frameT(449:576, :), sinWindowShort(1:128)'); + frameT(1025:end, :) = bsxfun(@times, frameT(1025:end, :), sinWindowLong(1025:end)'); + else + error('filterbank, l[20]: Unsupported window type input!') + end + + frameF = mdct4(frameT); + elseif strcmp(frameType, 'ESH') + for channel = 1:2 + % Splits the frame into sub-frames + [subFrames, ~] = buffer(frameT(449:end-448, channel), 256, 128, 'nodelay'); + + if strcmp(winType, 'KBD') + subFrames = subFrames .* repmat(kaiserWindowShort', [1 8]); + elseif strcmp(winType, 'SIN') + subFrames = subFrames .* repmat(sinWindowShort', [1 8]); + end + + for subFrameIndex = 1:8 + frameF((subFrameIndex - 1) * 128 + 1:subFrameIndex * 128, channel) = ... + mdct4(subFrames(:, subFrameIndex)); + end + end + end +end diff --git a/Level_3/huffCodebookSF.mat b/Level_3/huffCodebookSF.mat new file mode 100644 index 0000000..3413e1e Binary files /dev/null and b/Level_3/huffCodebookSF.mat differ diff --git a/Level_3/huffCodebooks.mat b/Level_3/huffCodebooks.mat new file mode 100644 index 0000000..048a422 Binary files /dev/null and b/Level_3/huffCodebooks.mat differ diff --git a/Level_3/iAACoder3.m b/Level_3/iAACoder3.m new file mode 100644 index 0000000..972c557 --- /dev/null +++ b/Level_3/iAACoder3.m @@ -0,0 +1,73 @@ +function x = iAACoder3(AACSeq3, fNameOut) +%Implementation of AAC decoder +% Usage x = iAACoder3(AACSeq3, fNameOut), where: +% Inputs +% - fNameOut is the filename and path of the file that will be +% written after decoding +% - AACSeq3 is an array of structs containing K structs, where K is +% the number of computed frames. Every struct of the array consists +% of: +% * a frameType, +% * a winType, +% * chl.TNScoeffs which are the quantized TNS coefficients of +% this frame's left channel, +% * chr.TNScoeffs which are the quantized TNS coefficients of +% this frame's right channel, +% * chl.T which are the psychoacoustic thresholds of this frame's +% left channel, +% * chr.T which are the psychoacoustic thresholds of this frame's +% right channel, +% * chl.G which are the quantized global gains of this frame's +% left channel, +% * chr.G which are the quantized global gains of this frame's +% right channel, +% * chl.sfc which is the Huffman encoded sfc sequence of this +% frame's left channel, +% * chr.sfc which is the Huffman encoded sfc sequence of this +% frame's right channel, +% * chl.stream which is the Huffman encoded quantized MDCT +% sequence of this frame's left channel, +% * chr.stream which is the Huffman encoded quantized MDCT +% sequence of this frame's right channel, +% * chl.codebook which is the Huffman codebook used for this +% frame's left channel +% * chr.codebook which is the Huffman codebook used for this +% frame's right channel +% +% Output +% - x is an array containing the decoded audio samples + + % Initializes an array to hold the decoded samples + decodedAudio(1024 * (length(AACSeq3) + 1), 2) = 0; + % Initializes an array to hold both audio channels + frameF(1024, 2) = 0; + + % Decodes audio file + huffLUT = loadLUT(); + for i = 0:length(AACSeq3) - 1 + currFrameStart = i * 1024 + 1; + currFrameStop = currFrameStart + 2047; + + SL = decodeHuff(AACSeq3(i + 1).chl.stream, ... + AACSeq3(i + 1).chl.codebook, huffLUT); + SR = decodeHuff(AACSeq3(i + 1).chr.stream, ... + AACSeq3(i + 1).chr.codebook, huffLUT); + + sfcL = decodeHuff(AACSeq3(i + 1).chl.sfc, 12, huffLUT); % TODO: maybe LUT(12); + sfcR = decodeHuff(AACSeq3(i + 1).chr.sfc, 12, huffLUT); + + frameF(:, 1) = iAACquantizer(SL, sfcL, AACSeq3(i + 1).chl.G, AACSeq3(i+1).frameType); + frameF(:, 2) = iAACquantizer(SR, sfcR, AACSeq3(i + 1).chr.G, AACSeq3(i+1).frameType); + + TNScoeffsL = AACSeq3(i + 1).chl.TNScoeffs; + TNScoeffsR = AACSeq3(i + 1).chr.TNScoeffs; + frameF(:, 1) = iTNS(frameF(:, 1), AACSeq3(i+1).frameType, TNScoeffsL); + frameF(:, 2) = iTNS(frameF(:, 2), AACSeq3(i+1).frameType, TNScoeffsR); + frameT = iFilterbank(frameF, AACSeq3(i+1).frameType, AACSeq3(i+1).winType); + + decodedAudio(currFrameStart:currFrameStop, :) = decodedAudio(currFrameStart:currFrameStop, :) + frameT; + end + + audiowrite(fNameOut, decodedAudio, 48000); + x = decodedAudio; +end diff --git a/Level_3/iAACquantizer.m b/Level_3/iAACquantizer.m new file mode 100644 index 0000000..9f24167 --- /dev/null +++ b/Level_3/iAACquantizer.m @@ -0,0 +1,24 @@ +function frameF = iAACquantizer(S, sfc, G, frameType) +%Implementation of the inverse Quantizer +% Usage frameF = iAACquantizer(S, sfc, G, frameType), where: +% Inputs +% - S are the MDCT quantization symbols of one audio channel stored +% in a vector of length 1024 +% - sfc are the scalefactors per band stored in an array of +% dimensions NBX8 for EIGHT_SHORT_SEQUENCE frames and NBX1 +% otherwise, where NB is the number of bands +% - G is the global gain stored in an array of dimensions 1X8 for +% EIGHT_SHORT_SEQUENCE frames and a single value otherwise +% - frameType is the type of the current frame in string +% representation, can be one of "OLS" (ONLY_LONG_SEQUENCE), "LSS" +% (LONG_START_SEQUENCE), "ESH" (EIGHT_SHORT_SEQUENCE), "LPS" +% (LONG_STOP_SEQUENCE) +% +% Output +% - frameF is the frame in the frequency domain, in MDCT coefficients +% representation containing only one of the audio channels stored in +% a vector of length 1024 + + +end + diff --git a/Level_3/iFilterbank.m b/Level_3/iFilterbank.m new file mode 100644 index 0000000..683be02 --- /dev/null +++ b/Level_3/iFilterbank.m @@ -0,0 +1,99 @@ +function frameT = iFilterbank(frameF, frameType, winType) +%Implementation of the Inverse Filter Bank step +% Usage frameT = iFilterbank(frameF, frameType, winType), where: +% Inputs +% - frameF is the frame in the frequency domain, in MDCT coefficients +% representation containing both channels of the audio stored in an +% array of dimensions 1024X2 +% - frameType is the type of the current frame in string +% representation, can be one of "OLS" (ONLY_LONG_SEQUENCE), "LSS" +% (LONG_START_SEQUENCE), "ESH" (EIGHT_SHORT_SEQUENCE), "LPS" +% (LONG_STOP_SEQUENCE) +% - winType is the type of the window selected, can be one of "KBD", +% "SIN" +% +% Output +% - frameT is a frame in the time domain, containing both channels of +% the audio stored in an array of dimensions 2048X2 + + % Declares persistent windows variables and initializes if empty + persistent kaiserWindowLong kaiserWindowShort sinWindowLong sinWindowShort; + if isempty(kaiserWindowLong) || isempty(kaiserWindowShort) || ... + isempty(sinWindowLong) || isempty(sinWindowShort) + kaiserLong = kaiser(1025, 6*pi); + kaiserSumLong = sum(kaiserLong); + kaiserShort = kaiser(129, 4*pi); + kaiserSumShort = sum(kaiserShort); + + kaiserWindowLong(1:1024) = movsum(kaiserLong(1:1024), [1024 0]); + kaiserWindowLong(1025:2048) = movsum(flipud(kaiserLong(1:1024)), [0 1024]); + kaiserWindowLong = sqrt(kaiserWindowLong ./ kaiserSumLong); + + sinWindowLong = sin(pi * ((0:2047) + 0.5) / 2048); + + kaiserWindowShort(1:128) = movsum(kaiserShort(1:128), [128 0]); + kaiserWindowShort(129:256) = movsum(flipud(kaiserShort(1:128)), [0 128]); + kaiserWindowShort = sqrt(kaiserWindowShort ./ kaiserSumShort); + + sinWindowShort = sin(pi * ((0:255) + 0.5) / 256); + end + + % Initializes output array + frameT(2048, 2) = 0; + + % Applies appropriate window to the frame + if strcmp(frameType, 'OLS') + frameT = imdct4(frameF); + + if strcmp(winType, 'KBD') + frameT = bsxfun(@times, frameT, kaiserWindowLong'); + elseif strcmp(winType, 'SIN') + frameT = bsxfun(@times, frameT, sinWindowLong'); + else + error('filterbank, l[20]: Unsupported window type input!') + end + elseif strcmp(frameType, 'LSS') + frameT = imdct4(frameF); + + if strcmp(winType, 'KBD') + frameT(1:1024, :) = bsxfun(@times, frameT(1:1024, :), kaiserWindowLong(1:1024)'); + frameT(1473:1600, :) = bsxfun(@times, frameT(1473:1600, :), kaiserWindowShort(129:end)'); + frameT(1601:end, :) = 0; + elseif strcmp(winType, 'SIN') + frameT(1:1024, :) = bsxfun(@times, frameT(1:1024, :), sinWindowLong(1:1024)'); + frameT(1473:1600, :) = bsxfun(@times, frameT(1473:1600, :), sinWindowShort(129:end)'); + frameT(1601:end, :) = 0; + else + error('filterbank, l[20]: Unsupported window type input!') + end + elseif strcmp(frameType, 'LPS') + frameT = imdct4(frameF); + + if strcmp(winType, 'KBD') + frameT(1:448, :) = 0; + frameT(449:576, :) = bsxfun(@times, frameT(449:576, :), kaiserWindowShort(1:128)'); + frameT(1025:end, :) = bsxfun(@times, frameT(1025:end, :), kaiserWindowLong(1025:end)'); + elseif strcmp(winType, 'SIN') + frameT(1:448, :) = 0; + frameT(449:576, :) = bsxfun(@times, frameT(449:576, :), sinWindowShort(1:128)'); + frameT(1025:end, :) = bsxfun(@times, frameT(1025:end, :), sinWindowLong(1025:end)'); + else + error('filterbank, l[20]: Unsupported window type input!') + end + elseif strcmp(frameType, 'ESH') + for channel=1:2 + for subFrameIndex = 1:8 + subFrame = imdct4(frameF((subFrameIndex - 1) * 128 + 1:subFrameIndex * 128, channel)); + + if strcmp(winType, 'KBD') + subFrame = subFrame .* kaiserWindowShort'; + elseif strcmp(winType, 'SIN') + subFrame = subFrame .* sinWindowShort'; + end + + frameT(448 + (subFrameIndex - 1) * 128 + 1:448 + (subFrameIndex + 1) * 128, channel) = ... + frameT(448 + (subFrameIndex - 1) * 128 + 1:448 + (subFrameIndex + 1) * 128, channel) + subFrame; + end + end + end +end diff --git a/Level_3/iTNS.m b/Level_3/iTNS.m new file mode 100644 index 0000000..d14426c --- /dev/null +++ b/Level_3/iTNS.m @@ -0,0 +1,38 @@ +function frameFout = iTNS(frameFin, frameType, TNScoeffs) +%Implementation of the reverse TNS step +% Usage frameFout = iTNS(frameFin, frameType, TNScoeffs), where: +% Inputs +% - frameFin is the frame in the frequency domain, in MDCT coefficients +% representation containing both channels of the audio stored in an +% array of dimensions 1024X2 +% - frameType is the type of the current frame in string +% representation, can be one of "OLS" (ONLY_LONG_SEQUENCE), "LSS" +% (LONG_START_SEQUENCE), "ESH" (EIGHT_SHORT_SEQUENCE), "LPS" +% (LONG_STOP_SEQUENCE) +% - TNScoeffs is the quantized TNS coefficients array of dimensions +% 4X8 for EIGHT_SHORT_SEQUENCE frames and 4X1 otherwise +% +% Output +% - frameFout is the frame in the frequency domain after Temporal Noise +% Shaping, in MDCT coefficients representation containing both channels +% of the audio stored in an array of dimensions 1024X2 + + % Declares persistent variable holding the TNS tables and initializes if empty + persistent TNSTables; + if isempty(TNSTables) + TNSTables = load('TableB219.mat'); + end + + if ~strcmp(frameType, 'ESH') + % Inverses MDCT coefficients filtering + frameFout = filter(1, [1 TNScoeffs], frameFin); + else + for subFrameIndex = 1:8 + subFrame = frameFin((subFrameIndex - 1) * 128 + 1:subFrameIndex * 128); + + % Inverses MDCT coefficients filtering + frameFout((subFrameIndex - 1) * 128 + 1:subFrameIndex * 128) = ... + filter(1, [1 TNScoeffs((subFrameIndex - 1) * 4 + 1:subFrameIndex * 4)], subFrame); + end + end +end diff --git a/Level_3/imdct4.m b/Level_3/imdct4.m new file mode 100644 index 0000000..e81887c --- /dev/null +++ b/Level_3/imdct4.m @@ -0,0 +1,61 @@ + +function y = imdct4(x) +% IMDCT4 Calculates the Modified Discrete Cosine Transform +% y = imdct4(x) +% +% x: input signal (can be either a column or frame per column) +% y: IMDCT of x +% +% Vectorize ! ! ! + +% ------- imdct4.m ----------------------------------------- +% Marios Athineos, marios@ee.columbia.edu +% http://www.ee.columbia.edu/~marios/ +% Copyright (c) 2002 by Columbia University. +% All rights reserved. +% ---------------------------------------------------------- + +[flen,fnum] = size(x); +% Make column if it's a single row +if (flen==1) + x = x(:); + flen = fnum; + fnum = 1; +end + +% We need these for furmulas below +N = flen; +M = N/2; +twoN = 2*N; +sqrtN = sqrt(twoN); + +% We need this twice so keep it around +t = (0:(M-1)).'; +w = diag(sparse(exp(-j*2*pi*(t+1/8)/twoN))); + +% Pre-twiddle +t = (0:(M-1)).'; +c = x(2*t+1,:) + j*x(N-1-2*t+1,:); +c = (0.5*w)*c; + +% FFT for N/2 points only !!! +c = fft(c,M); + +% Post-twiddle +c = ((8/sqrtN)*w)*c; + +% Preallocate rotation matrix +rot = zeros(twoN,fnum); + +% Sort +t = (0:(M-1)).'; +rot(2*t+1,:) = real(c(t+1,:)); +rot(N+2*t+1,:) = imag(c(t+1,:)); +t = (1:2:(twoN-1)).'; +rot(t+1,:) = -rot(twoN-1-t+1,:); + +% Shift +t = (0:(3*M-1)).'; +y(t+1,:) = rot(t+M+1,:); +t = (3*M:(twoN-1)).'; +y(t+1,:) = -rot(t-3*M+1,:); \ No newline at end of file diff --git a/Level_3/loadLUT.m b/Level_3/loadLUT.m new file mode 100644 index 0000000..0420b09 --- /dev/null +++ b/Level_3/loadLUT.m @@ -0,0 +1,50 @@ +function huffLUT = loadLUT() + load huffCodebooks + load huffCodebookSF + huffCodebooks{end + 1} = huffCodebookSF; + for i=1:12 + h=huffCodebooks{i}(:,3); + hlength=huffCodebooks{i}(:,2); + for j=1:length(h) + hbin{j}=dec2bin(h(j),hlength(j)); + end + invTable{i}=vlcTable(hbin); + clear hbin; + end + huffLUT{1}=struct('LUT',huffCodebooks{1},'invTable',invTable{1},'codebook',1,'nTupleSize',4,'maxAbsCodeVal',1,'signedValues',1); + huffLUT{2}=struct('LUT',huffCodebooks{2},'invTable',invTable{2},'codebook',2,'nTupleSize',4,'maxAbsCodeVal',1,'signedValues',1); + huffLUT{3}=struct('LUT',huffCodebooks{3},'invTable',invTable{3},'codebook',3,'nTupleSize',4,'maxAbsCodeVal',2,'signedValues',0); + huffLUT{4}=struct('LUT',huffCodebooks{4},'invTable',invTable{4},'codebook',4,'nTupleSize',4,'maxAbsCodeVal',2,'signedValues',0); + huffLUT{5}=struct('LUT',huffCodebooks{5},'invTable',invTable{5},'codebook',5,'nTupleSize',2,'maxAbsCodeVal',4,'signedValues',1); + huffLUT{6}=struct('LUT',huffCodebooks{6},'invTable',invTable{6},'codebook',6,'nTupleSize',2,'maxAbsCodeVal',4,'signedValues',1); + huffLUT{7}=struct('LUT',huffCodebooks{7},'invTable',invTable{7},'codebook',7,'nTupleSize',2,'maxAbsCodeVal',7,'signedValues',0); + huffLUT{8}=struct('LUT',huffCodebooks{8},'invTable',invTable{8},'codebook',8,'nTupleSize',2,'maxAbsCodeVal',7,'signedValues',0); + huffLUT{9}=struct('LUT',huffCodebooks{9},'invTable',invTable{9},'codebook',9,'nTupleSize',2,'maxAbsCodeVal',12,'signedValues',0); + huffLUT{10}=struct('LUT',huffCodebooks{10},'invTable',invTable{10},'codebook',10,'nTupleSize',2,'maxAbsCodeVal',12,'signedValues',0); + huffLUT{11}=struct('LUT',huffCodebooks{11},'invTable',invTable{11},'codebook',11,'nTupleSize',2,'maxAbsCodeVal',16,'signedValues',0); + huffLUT{12}=struct('LUT',huffCodebooks{12},'invTable',invTable{12},'codebook',12,'nTupleSize',1,'maxAbsCodeVal',60,'signedValues',1); + +function [h]=vlcTable(codeArray) +% Generates the inverse variable length coding tree, stored in a table. It +% is used for decoding. +% +% codeArray: the input matrix, in the form of a cell array, with each cell +% containing codewords represented in string format (array of chars) + h=zeros(1,3); + for codeIndex=1:length(codeArray) + word=str2num([codeArray{codeIndex}]')'; + hIndex=1; + hLength=size(h,1); + for i=1:length(word) + k=word(i)+1; + if(~h(hIndex,k)) + h(hIndex,k)=hLength+1; + hIndex=hLength+1; + h(hIndex,1:2)=zeros(1,2); + hLength=hLength+1; + else + hIndex=h(hIndex,k); + end + end + h(hIndex,3)=codeIndex; + end \ No newline at end of file diff --git a/Level_3/mdct4.m b/Level_3/mdct4.m new file mode 100644 index 0000000..37c8f11 --- /dev/null +++ b/Level_3/mdct4.m @@ -0,0 +1,75 @@ + +function y = mdct4(x) +% MDCT4 Calculates the Modified Discrete Cosine Transform +% y = mdct4(x) +% +% Use either a Sine or a Kaiser-Bessel Derived window (KBDWin)with +% 50% overlap for perfect TDAC reconstruction. +% Remember that MDCT coefs are symmetric: y(k)=-y(N-k-1) so the full +% matrix (N) of coefs is: yf = [y;-flipud(y)]; +% +% x: input signal (can be either a column or frame per column) +% length of x must be a integer multiple of 4 (each frame) +% y: MDCT of x (coefs are divided by sqrt(N)) +% +% Vectorize ! ! ! + +% ------- mdct4.m ------------------------------------------ +% Marios Athineos, marios@ee.columbia.edu +% http://www.ee.columbia.edu/~marios/ +% Copyright (c) 2002 by Columbia University. +% All rights reserved. +% ---------------------------------------------------------- + +[flen,fnum] = size(x); +% Make column if it's a single row +if (flen==1) + x = x(:); + flen = fnum; + fnum = 1; +end +% Make sure length is multiple of 4 +if (rem(flen,4)~=0) + error('MDCT4 defined for lengths multiple of four.'); +end + +% We need these for furmulas below +N = flen; % Length of window +M = N/2; % Number of coefficients +N4 = N/4; % Simplify the way eqs look +sqrtN = sqrt(N); + +% Preallocate rotation matrix +% It would be nice to be able to do it in-place but we cannot +% cause of the prerotation. +rot = zeros(flen,fnum); + +% Shift +t = (0:(N4-1)).'; +rot(t+1,:) = -x(t+3*N4+1,:); +t = (N4:(N-1)).'; +rot(t+1,:) = x(t-N4+1,:); +clear x; + +% We need this twice so keep it around +t = (0:(N4-1)).'; +w = diag(sparse(exp(-j*2*pi*(t+1/8)/N))); + +% Pre-twiddle +t = (0:(N4-1)).'; +c = (rot(2*t+1,:)-rot(N-1-2*t+1,:))... + -j*(rot(M+2*t+1,:)-rot(M-1-2*t+1,:)); +% This is a really cool Matlab trick ;) +c = 0.5*w*c; +clear rot; + +% FFT for N/4 points only !!! +c = fft(c,N4); + +% Post-twiddle +c = (2/sqrtN)*w*c; + +% Sort +t = (0:(N4-1)).'; +y(2*t+1,:) = real(c(t+1,:)); +y(M-1-2*t+1,:) = -imag(c(t+1,:)); \ No newline at end of file diff --git a/Level_3/psycho.m b/Level_3/psycho.m new file mode 100644 index 0000000..24e79e5 --- /dev/null +++ b/Level_3/psycho.m @@ -0,0 +1,23 @@ +function SMR = psycho(frameT, frameType, frameTprev1, frameTprev2) +%Implementation of Psychoacoustic Model +% Usage SMR = psycho(frameT, frameType, frameTprev1, frameTprev2), where: +% Inputs +% - frameT is a frame in the time domain, containing only one of the +% audio channels stored in a vector of length 2048 +% - frameType is the type of the current frame in string +% representation, can be one of "OLS" (ONLY_LONG_SEQUENCE), "LSS" +% (LONG_START_SEQUENCE), "ESH" (EIGHT_SHORT_SEQUENCE), "LPS" +% (LONG_STOP_SEQUENCE) +% - frameTprev1 is the previous frame in the time domain, containing +% only one of the audio channels stored in a vector of length 2048 +% - frameTprev2 is the frame before frameTprev1 in the time domain, +% containing only one of the audio channels stored in a vector of +% length 2048 +% +% Output +% - SMR is the signal to mask ratio array of dimensions 42X8 for +% EIGHT_SHORT_SEQUENCE frames and 69X1 otherwise + + +end +