Browse Source

Init level 3

master
Apostolos Fanakis 6 years ago
parent
commit
1a78458f95
  1. 123
      Level_3/AACoder3.m
  2. 26
      Level_3/AACquantizer.m
  3. BIN
      Level_3/LicorDeCalandraca.wav
  4. 78
      Level_3/SSC.m
  5. 119
      Level_3/TNS.m
  6. BIN
      Level_3/TableB219.mat
  7. 82
      Level_3/decodeHuff.m
  8. 53
      Level_3/demoAAC3.m
  9. 194
      Level_3/encodeHuff.m
  10. 100
      Level_3/filterbank.m
  11. BIN
      Level_3/huffCodebookSF.mat
  12. BIN
      Level_3/huffCodebooks.mat
  13. 73
      Level_3/iAACoder3.m
  14. 24
      Level_3/iAACquantizer.m
  15. 99
      Level_3/iFilterbank.m
  16. 38
      Level_3/iTNS.m
  17. 61
      Level_3/imdct4.m
  18. 50
      Level_3/loadLUT.m
  19. 75
      Level_3/mdct4.m
  20. 23
      Level_3/psycho.m

123
Level_3/AACoder3.m

@ -0,0 +1,123 @@
function AACSeq3 = AACoder3(fNameIn, fnameAACoded)
%Implementation of AAC encoder
% Usage AACSeq3 = AACoder3(fNameIn, fnameAACoded), where:
% Inputs
% - fNameIn is the filename and path of the file to encode
% - frameAACoded is the filename and path of the mat file that will
% be written after encoding
%
% Output
% - AACSeq3 is an array of structs containing K structs, where K is
% the number of computed frames. Every struct of the array consists
% of:
% * a frameType,
% * a winType,
% * chl.TNScoeffs which are the quantized TNS coefficients of
% this frame's left channel,
% * chr.TNScoeffs which are the quantized TNS coefficients of
% this frame's right channel,
% * chl.T which are the psychoacoustic thresholds of this frame's
% left channel,
% * chr.T which are the psychoacoustic thresholds of this frame's
% right channel,
% * chl.G which are the quantized global gains of this frame's
% left channel,
% * chr.G which are the quantized global gains of this frame's
% right channel,
% * chl.sfc which is the Huffman encoded sfc sequence of this
% frame's left channel,
% * chr.sfc which is the Huffman encoded sfc sequence of this
% frame's right channel,
% * chl.stream which is the Huffman encoded quantized MDCT
% sequence of this frame's left channel,
% * chr.stream which is the Huffman encoded quantized MDCT
% sequence of this frame's right channel,
% * chl.codebook which is the Huffman codebook used for this
% frame's left channel
% * chr.codebook which is the Huffman codebook used for this
% frame's right channel
% Declares constant window type
WINDOW_TYPE = 'SIN';
% Reads the audio file
[originalAudioData, ~] = audioread(fNameIn);
% Splits the audio in frames and determines the type of each frame
frameTypes{fix((length(originalAudioData) - 1025) / 1024)} = 'OLS';
frameTypes{1} = 'OLS';
for i = 1:length(frameTypes) - 2
nextFrameStart = (i + 1) * 1024 + 1;
nextFrameStop = nextFrameStart + 2047;
frameTypes{i+1} = SSC(1, originalAudioData(nextFrameStart:nextFrameStop, :), frameTypes{i});
end
% Assigns a type to the last frame
if strcmp(frameTypes{length(frameTypes) - 1}, 'LSS')
frameTypes{length(frameTypes)} = 'ESH';
elseif strcmp(frameTypes{length(frameTypes) - 1}, 'ESH')
frameTypes{length(frameTypes)} = 'ESH';
else
frameTypes{length(frameTypes)} = 'OLS';
end
% Encodes audio file
huffLUT = loadLUT();
AACSeq3(length(frameTypes)) = struct;
for i = 0:length(frameTypes) - 1
currFrameStart = i * 1024 + 1;
currFrameStop = currFrameStart + 2047;
frameF = filterbank(originalAudioData(currFrameStart:currFrameStop, :), frameTypes{i+1}, WINDOW_TYPE);
[frameF(:, 1), TNScoeffsL] = TNS(frameF(:, 1), frameTypes{i+1});
[frameF(:, 2), TNScoeffsR] = TNS(frameF(:, 2), frameTypes{i+1});
if i < 2
% TODO: what happens on the first two frames?
else
prev1FrameStart = (i - 1) * 1024 + 1;
prev1FrameStop = prev1FrameStart + 2047;
prev2FrameStart = (i - 2) * 1024 + 1;
prev2FrameStop = prev1FrameStart + 2047;
SMR = psycho(...
originalAudioData(currFrameStart:currFrameStop, :), ...
frameTypes{i+1}, ...
originalAudioData(prev1FrameStart:prev1FrameStop, :), ...
originalAudioData(prev2FrameStart:prev2FrameStop, :));
[SL, sfcL, GL] = AACquantizer(frameF, frameTypes{i+1}, SMR);
[SR, sfcR, GR] = AACquantizer(frameF, frameTypes{i+1}, SMR);
end
[streamL, huffcodebookL] = encodeHuff(SL, huffLUT);
[streamR, huffcodebookR] = encodeHuff(SR, huffLUT);
[sfcL, ~] = encodeHuff(sfcL, huffLUT, 12);
[sfcR, ~] = encodeHuff(sfcR, huffLUT, 12);
AACSeq3(i + 1).frameType = frameTypes(i + 1);
AACSeq3(i + 1).winType = WINDOW_TYPE;
AACSeq3(i + 1).chl.TNScoeffs = TNScoeffsL;
AACSeq3(i + 1).chr.TNScoeffs = TNScoeffsR;
AACSeq3(i + 1).chl.T = what; % TODO: find dis shit
AACSeq3(i + 1).chr.T = what;
AACSeq3(i + 1).chl.G = GL;
AACSeq3(i + 1).chr.G = GR;
AACSeq3(i + 1).chl.sfc = sfcL;
AACSeq3(i + 1).chr.sfc = sfcR;
AACSeq3(i + 1).chl.stream = streamL;
AACSeq3(i + 1).chr.stream = streamR;
AACSeq3(i + 1).chl.codebook = huffcodebookL;
AACSeq3(i + 1).chr.codebook = huffcodebookR;
end
save(fnameAACoded,AACSeq3);
if false
[idx,label] = grp2idx(sort(frameTypes));
hist(idx,unique(idx));
set(gca,'xTickLabel',label)
sum(idx(:) == 1)
sum(idx(:) == 2)
sum(idx(:) == 3)
sum(idx(:) == 4)
end
end

26
Level_3/AACquantizer.m

@ -0,0 +1,26 @@
function [S, sfc, G] = AACquantizer(frameF, frameType, SMR)
%Implementation of Quantizer
% Usage [S, sfc, G] = AACquantizer(frameF, frameType, SMR), where:
% Inputs
% - frameF is the frame in the frequency domain, in MDCT coefficients
% representation containing only one of the audio channels stored in
% a vector of length 1024
% - frameType is the type of the current frame in string
% representation, can be one of "OLS" (ONLY_LONG_SEQUENCE), "LSS"
% (LONG_START_SEQUENCE), "ESH" (EIGHT_SHORT_SEQUENCE), "LPS"
% (LONG_STOP_SEQUENCE)
% - SMR is the signal to mask ratio array of dimensions 42X8 for
% EIGHT_SHORT_SEQUENCE frames and 69X1 otherwise
%
% Output
% - S are the MDCT quantization symbols of one audio channel stored
% in a vector of length 1024
% - sfc are the scalefactors per band stored in an array of
% dimensions NBX8 for EIGHT_SHORT_SEQUENCE frames and NBX1
% otherwise, where NB is the number of bands
% - G is the global gain stored in an array of dimensions 1X8 for
% EIGHT_SHORT_SEQUENCE frames and a single value otherwise
end

BIN
Level_3/LicorDeCalandraca.wav

Binary file not shown.

78
Level_3/SSC.m

@ -0,0 +1,78 @@
function frameType = SSC(~, nextFrameT, prevFrameType)
%Implementation of the Sequence Segmentation Control step
% Usage frameType = SSC(frameT, nextFrameT, prevFrameType), where:
% Inputs
% - frameT is a frame in the time domain, containing both channels of
% the audio stored in an array of dimensions 2048X2
% - nextFrameT is the next frame in the time domain, containing both
% channels of the audio stored in an array of dimensions 2048X2
% - prevFrameType is the type of the previous frame in string
% representation, can be one of "OLS" (ONLY_LONG_SEQUENCE), "LSS"
% (LONG_START_SEQUENCE), "ESH" (EIGHT_SHORT_SEQUENCE), "LPS"
% (LONG_STOP_SEQUENCE)
%
% Output
% - frameType is the type of the current frame in string
% representation, can be one of "OLS" (ONLY_LONG_SEQUENCE), "LSS"
% (LONG_START_SEQUENCE), "ESH" (EIGHT_SHORT_SEQUENCE), "LPS"
% (LONG_STOP_SEQUENCE)
% Examines the cases where the determination of the type of the next
% frame isn't needed for
if strcmp(prevFrameType, 'LSS')
frameType = 'ESH';
return;
elseif strcmp(prevFrameType, 'LPS')
frameType = 'OLS';
return;
end
% Determines the type of the next frame
% Filters frame
nextFrameT = filter([0.7548 -0.7548], [1 -0.5095], nextFrameT, [], 1);
channelFrameType = {'nan', 'nan'};
for channel = 1:2
% Calculates sub-frame energy estimation
[subFrames, ~] = buffer(nextFrameT(577:end - 448, channel), 128, 0, 'nodelay');
energyEstimations = sum(subFrames .^ 2, 1);
% Calculates the ratio of the sub-frame energy to the average energy of
% the previous sub-frames
energyRatios = movmean(energyEstimations, [8 0]);
energyRatios = energyEstimations ./ [energyEstimations(1) energyRatios(1:end-1)];
if ~isempty(find(energyEstimations > 10^(-3), 1)) && ...
~isempty(find(energyRatios(energyEstimations > 10^(-3)) > 10, 1))
% Next frame is ESH
if strcmp(prevFrameType, 'ESH')
% This frame of this channel is an EIGHT_SHORT_SEQUENCE type
% frame. This means the frames of both channels will be encoded
% as EIGHT_SHORT_SEQUENCE type frames.
frameType = 'ESH';
return;
elseif strcmp(prevFrameType, 'OLS')
channelFrameType{channel} = 'LSS';
end
else
if strcmp(prevFrameType, 'ESH')
channelFrameType{channel} = 'LPS';
elseif strcmp(prevFrameType, 'OLS')
channelFrameType{channel} = 'OLS';
end
end
end
% Joins decision for both channels
if strcmp(channelFrameType{1}, 'nan') || strcmp(channelFrameType{2}, 'nan')
error('SSC, l[73]: Internal error occurred!')
end
if strcmp(channelFrameType{1}, 'OLS')
frameType = channelFrameType{2};
elseif strcmp(channelFrameType{2}, 'OLS')
frameType = channelFrameType{1};
elseif strcmp(channelFrameType{1}, channelFrameType{2})
frameType = channelFrameType{1};
else
frameType = 'ESH';
end
end

119
Level_3/TNS.m

@ -0,0 +1,119 @@
function [frameFout, TNScoeffs] = TNS(frameFin, frameType)
%Implementation of the TNS step
% Usage [frameFout, TNScoeffs] = TNS(frameFin, frameType), where:
% Inputs
% - frameFin is the frame in the frequency domain, in MDCT coefficients
% representation containing both channels of the audio stored in an
% array of dimensions 1024X2
% - frameType is the type of the current frame in string
% representation, can be one of "OLS" (ONLY_LONG_SEQUENCE), "LSS"
% (LONG_START_SEQUENCE), "ESH" (EIGHT_SHORT_SEQUENCE), "LPS"
% (LONG_STOP_SEQUENCE)
%
% Output
% - frameFout is the frame in the frequency domain after Temporal Noise
% Shaping, in MDCT coefficients representation containing both channels
% of the audio stored in an array of dimensions 1024X2
% - TNScoeffs is the quantized TNS coefficients array of dimensions
% 4X8 for EIGHT_SHORT_SEQUENCE frames and 4X1 otherwise
% Declares constant numbers of bands for long and short windows
LONG_WINDOW_NUMBER_OF_BANDS = 69;
SHORT_WINDOW_NUMBER_OF_BANDS = 42;
% Declares constant order of the linear prediction filter
LPF_ORDER = 4;
% Declares constant coefficients' resolution, in the form of number of
% decimal digits used
COEF_RES = 1;
% Declares persistent variable holding the TNS tables and initializes if empty
persistent TNSTables;
if isempty(TNSTables)
TNSTables = load('TableB219.mat');
end
if ~strcmp(frameType, 'ESH')
% Calculates the energy per band
bandEnergy(LONG_WINDOW_NUMBER_OF_BANDS) = 0;
for band = 1:LONG_WINDOW_NUMBER_OF_BANDS - 1
bandEnergy(band) = sumsqr(frameFin(TNSTables.B219a(band, 2) + 1:TNSTables.B219a(band + 1, 2)));
end
bandEnergy(LONG_WINDOW_NUMBER_OF_BANDS) = sumsqr(frameFin( ...
TNSTables.B219a(LONG_WINDOW_NUMBER_OF_BANDS, 2) + 1:end));
% Calculates the normalization factors
bandIndices = quantiz(0:1023, TNSTables.B219a(:, 2) - 1);
normalizationFactor = sqrt(bandEnergy(bandIndices));
% Smooths normalization factors
for normIndex = 1023:-1:1
normalizationFactor(normIndex) = (normalizationFactor(normIndex) + normalizationFactor(normIndex + 1)) / 2;
end
for normIndex = 2:1024
normalizationFactor(normIndex) = (normalizationFactor(normIndex) + normalizationFactor(normIndex - 1)) / 2;
end
% Normalizes MDCT coefficients according to each band energy
normalizedFrameFin = frameFin ./ normalizationFactor';
% Calculates the linear prediction coefficients
[linPredCoeff, ~] = lpc(normalizedFrameFin, LPF_ORDER);
% Quantizes these coefficients
quantizedLinPredCoeff = round(linPredCoeff, COEF_RES);
% Filters MDCT coefficients
if ~isstable(1, quantizedLinPredCoeff)
error('TNS, l[79]: Inverse filter not stable!');
else
TNScoeffs = quantizedLinPredCoeff(2:end);
frameFout = filter(quantizedLinPredCoeff, 1, frameFin);
end
else
% Initializes output vectors
TNScoeffs(LPF_ORDER * 8) = 0;
frameFout(1024) = 0;
bandEnergy(SHORT_WINDOW_NUMBER_OF_BANDS) = 0;
for subFrameIndex = 1:8
subFrame = frameFin((subFrameIndex - 1) * 128 + 1:subFrameIndex * 128);
% Calculates the energy per band
for band = 1:SHORT_WINDOW_NUMBER_OF_BANDS - 1
bandEnergy(band) = sumsqr(subFrame(TNSTables.B219b(band, 2) + 1:TNSTables.B219b(band + 1, 2)));
end
bandEnergy(SHORT_WINDOW_NUMBER_OF_BANDS) = sumsqr(subFrame( ...
TNSTables.B219b(SHORT_WINDOW_NUMBER_OF_BANDS, 2) + 1:end));
% Calculates the normalization factors
bandIndices = quantiz(0:127, TNSTables.B219b(:, 2) - 1);
normalizationFactor = sqrt(bandEnergy(bandIndices));
% Smooths normalization factors
for normIndex = 127:-1:1
normalizationFactor(normIndex) = (normalizationFactor(normIndex) + normalizationFactor(normIndex + 1)) / 2;
end
for normIndex = 2:128
normalizationFactor(normIndex) = (normalizationFactor(normIndex) + normalizationFactor(normIndex - 1)) / 2;
end
% Normalizes MDCT coefficients according to each band energy
normalizedFrameFin = subFrame ./ normalizationFactor';
% Calculates the linear prediction coefficients
[linPredCoeff, ~] = lpc(normalizedFrameFin, LPF_ORDER);
% Quantizes these coefficients
quantizedLinPredCoeff = round(linPredCoeff, COEF_RES);
% Filters MDCT coefficients
if ~isstable(1, quantizedLinPredCoeff)
error('TNS, l[79]: Inverse filter not stable!');
else
TNScoeffs((subFrameIndex - 1) * 4 + 1:subFrameIndex * 4) = quantizedLinPredCoeff(2:end);
frameFout((subFrameIndex - 1) * 128 + 1:subFrameIndex * 128) = ...
filter(quantizedLinPredCoeff, 1, subFrame);
end
end
end
end

BIN
Level_3/TableB219.mat

Binary file not shown.

82
Level_3/decodeHuff.m

@ -0,0 +1,82 @@
function decCoeffs = decodeHuff(huffSec, huffCodebook, huffLUT)
% DECODEHUFF Huffman decoder stage
%
% decCoeffs = decodeHuff(huffSec, huffCodebook, huffLUT) performs huffman
% decoding, where huffSec is a string of '1' and '0' corresponding to the
% Huffman encoded stream, and huffCodebook is the index (0 to 12) of the
% codebook used, as outputted by encodeHuff. huffLUT is the Huffman
% look-up tables to be loaded using loadLUT.m. The output decCoeffs is
% the decoded quantised (integer) values.
%
% CAUTION: due to zero padding the length of decCoeffs may be larger than
% the length of the encoded sequence. Simply ignore values (they should
% be equal to zero) that are outside the index range
% [1:length(encoded sequence)].
huffLUTi = huffLUT{huffCodebook};
h=huffLUTi.invTable;
huffCodebook=huffLUTi.codebook;
nTupleSize=huffLUTi.nTupleSize;
maxAbsCodeVal=huffLUTi.maxAbsCodeVal;
signedValues=huffLUTi.signedValues;
eos=0;%end of stream
decCoeffs=[];
huffSec=huffSec-48;
streamIndex=1;
while(~eos)%end of stream
huffFailure=0;
wordbit=0;
r=1;%row indicator
b=huffSec(streamIndex+wordbit);
%decoding tuple according to inverse matrix
while(1)
b=huffSec(streamIndex+wordbit);
wordbit=wordbit+1;
rOld=r;
r=h(rOld,b+1);%new index
if((h(r,1)==0)&(h(r,2)==0))%reached a leaf (found a valid word)
symbolIndex=h(r,3)-1;%The actual index begins from zero.
streamIndex=streamIndex+wordbit;
break;
end
end
%decoding nTuple magnitudes
if(signedValues)
base=2*maxAbsCodeVal+1;
nTupleDec=rem(floor(symbolIndex*base.^([-nTupleSize+1:1:0])),base);
nTupleDec=nTupleDec-maxAbsCodeVal;
else
base=maxAbsCodeVal+1;
nTupleDec=rem(floor(symbolIndex*base.^([-nTupleSize+1:1:0])),base);
nTupleSignBits=huffSec(streamIndex:streamIndex+nTupleSize-1);
nTupleSign=-sign(nTupleSignBits(:)'-0.5);
streamIndex=streamIndex+nTupleSize;
nTupleDec=nTupleDec.*nTupleSign;
end
escIndex=find(abs(nTupleDec)==16);
if((huffCodebook==11)&&(sum(escIndex)))
for i=1:length(escIndex)
b=huffSec(streamIndex);
N=0;
%reading the escape seq, counting N '1's
while(b)
N=N+1;
b=huffSec(streamIndex+N);
end%last bit read was a zero corresponding to esc_separator..
streamIndex=streamIndex+N;
%reading the next N+4 bits
N4=N+4;
escape_word=huffSec(streamIndex+1:streamIndex+N4);
nTupleDec(escIndex(i))=2^N4+bin2dec(num2str(escape_word));
streamIndex=streamIndex+N4+1;
end
nTupleDec(escIndex)=nTupleDec(escIndex).*nTupleSign(escIndex);
end
decCoeffs=[decCoeffs nTupleDec];
if (streamIndex>length(huffSec))
eos=1;
end
end

53
Level_3/demoAAC3.m

@ -0,0 +1,53 @@
function [SNR, bitrate, compression] = demoAAC3(fNameIn, fNameOut, frameAACoded)
%Function that demonstrates usage of the level 3 code
% Usage [SNR, bitrate, compression] = demoAAC3(fNameIn, fNameOut, frameAACoded), where:
% Inputs
% - fNameIn is the filename and path of the file to encode
% - fNameOut is the filename and path of the wav file that will be
% written after decoding
% - frameAACoded is the filename and path of the mat file that will
% be written after encoding
%
% Output
% - SNR is the signal to noise ration computed after successively
% encoding and decoding the audio signal
% - bitrate is the bits per second
% - compression is the ratio of the bitrate before the encoding to
% the bitrate after it
AACSeq3 = AACoder3(fNameIn, frameAACoded);
decodedAudio = iAACoder3(AACSeq3, fNameOut);
[audioData, ~] = audioread(fNameIn);
% figure()
% plot(audioData(1025:length(decodedAudio)-1024, 1) - decodedAudio(1025:end-1024, 1))
% for frame = 1:length(AACSeq1)
% if strcmp(AACSeq1(frame).frameType, 'LSS') || strcmp(AACSeq1(frame).frameType, 'LPS')
% % Add lines
% h1 = line([(frame-1)*1024+1 (frame-1)*1024+1],[-2*10^(-15) 2*10^(-15)]);
% h2 = line([frame*1024+1 frame*1024+1],[-2*10^(-15) 2*10^(-15)]);
% % Set properties of lines
% set([h1 h2],'Color','y','LineWidth',2)
% % Add a patch
% patch([(frame-1)*1024+1 frame*1024+1 frame*1024+1 (frame-1)*1024+1],[-2*10^(-15) -2*10^(-15) 2*10^(-15) 2*10^(-15)],'y')
% elseif strcmp(AACSeq1(frame).frameType, 'ESH')
% % Add lines
% h1 = line([(frame-1)*1024+1 (frame-1)*1024+1],[-2*10^(-15) 2*10^(-15)]);
% h2 = line([frame*1024+1 frame*1024+1],[-2*10^(-15) 2*10^(-15)]);
% % Set properties of lines
% set([h1 h2],'Color','r','LineWidth',2)
% % Add a patch
% patch([(frame-1)*1024+1 frame*1024+1 frame*1024+1 (frame-1)*1024+1],[-2*10^(-15) -2*10^(-15) 2*10^(-15) 2*10^(-15)],'r')
% end
% end
% set(gca,'children',flipud(get(gca,'children')))
%
% figure()
% plot(audioData(1025:length(decodedAudio)-1024, 2) - decodedAudio(1025:end-1024, 2))
snr(audioData(1025:length(decodedAudio)-1024, 1), audioData(1025:length(decodedAudio)-1024, 1) - decodedAudio(1025:end-1024, 1))
snr(audioData(1025:length(decodedAudio)-1024, 2), audioData(1025:length(decodedAudio)-1024, 2) - decodedAudio(1025:end-1024, 2))
SNR = 10*log10((sum(audioData(1025:length(decodedAudio)-1024, :)) .^ 2) ./ ...
(sum(audioData(1025:length(decodedAudio)-1024, :) - decodedAudio(1025:end-1024, :)) .^ 2));
end

194
Level_3/encodeHuff.m

@ -0,0 +1,194 @@
function [huffSec, huffCodebook] = encodeHuff(coeffSec, huffLUT, forcedCodebook)
% ENCODEHUFF Huffman encoder stage
%
% [huffSec, huffCodebook] = encodeHuff(coeffSec, huffLUT) performs
% huffman coding for quantised (integer) values of a section coeffSec;
% huffLUT are the Huffman look-up tables to be loaded using loadLUT.m
%
% It returns huffSec, a string of '1' and '0' corresponding to the
% Huffman encoded stream, and huffCodebook, the number of the Huffman
% codebook used (see page 147 in w2203tfa.pdf and pages 82-94 in
% w2203tft.pdf).
%
% [huffSec, huffCodebook] = encodeHuff(coeffSec, huffLUT, forcedCodebook)
% forces the codebook forcedCodebook to be used.
if nargin == 2
[huffSec, huffCodebook] = encodeHuff1(coeffSec, huffLUT);
else
huffSec = huffLUTCode1(huffLUT{forcedCodebook}, coeffSec);
huffCodebook = forcedCodebook;
end
function [huffSec, huffCodebook]=encodeHuff1(coeffSec,huffLUT)
maxAbsVal=max(abs(coeffSec));
ESC=0;%escape sequence used?
switch(maxAbsVal)
case 0
huffCodebook=0;
[huffSec]=huffLUTCode0();
case 1
huffCodebook=[1 2];
% signedValues=1;
% nTupleSize=4;
% maxAbsCodeVal=1;
[huffSec1]=huffLUTCode1(huffLUT{huffCodebook(1)}, coeffSec);
[huffSec2]=huffLUTCode1(huffLUT{huffCodebook(2)}, coeffSec);
if(length(huffSec1)<=length(huffSec2))
huffSec=huffSec1;
huffCodebook=huffCodebook(1);
else
huffSec=huffSec2;
huffCodebook=huffCodebook(2);
end
case 2
huffCodebook=[3 4];
% signedValues=0;
% nTupleSize=4;
% maxAbsCodeVal=2;
[huffSec1]=huffLUTCode1(huffLUT{huffCodebook(1)}, coeffSec);
[huffSec2]=huffLUTCode1(huffLUT{huffCodebook(2)}, coeffSec);
if(length(huffSec1)<=length(huffSec2))
huffSec=huffSec1;
huffCodebook=huffCodebook(1);
else
huffSec=huffSec2;
huffCodebook=huffCodebook(2);
end
case {3,4}
huffCodebook=[5 6];
[huffSec1]=huffLUTCode1(huffLUT{huffCodebook(1)}, coeffSec);
[huffSec2]=huffLUTCode1(huffLUT{huffCodebook(2)}, coeffSec);
if(length(huffSec1)<=length(huffSec2))
huffSec=huffSec1;
huffCodebook=huffCodebook(1);
else
huffSec=huffSec2;
huffCodebook=huffCodebook(2);
end
case {5,6,7}
huffCodebook=[7 8];
[huffSec1]=huffLUTCode1(huffLUT{huffCodebook(1)}, coeffSec);
[huffSec2]=huffLUTCode1(huffLUT{huffCodebook(2)}, coeffSec);
if(length(huffSec1)<=length(huffSec2))
huffSec=huffSec1;
huffCodebook=huffCodebook(1);
else
huffSec=huffSec2;
huffCodebook=huffCodebook(2);
end
case {8,9,10,11,12}
huffCodebook=[9 10];
[huffSec1]=huffLUTCode1(huffLUT{huffCodebook(1)}, coeffSec);
[huffSec2]=huffLUTCode1(huffLUT{huffCodebook(2)}, coeffSec);
if(length(huffSec1)<=length(huffSec2))
huffSec=huffSec1;
huffCodebook=huffCodebook(1);
else
huffSec=huffSec2;
huffCodebook=huffCodebook(2);
end
case {13,14,15}
huffCodebook=11;
[huffSec]=huffLUTCode1(huffLUT{huffCodebook(1)}, coeffSec);
otherwise
huffCodebook=11;
[huffSec]=huffLUTCodeESC(huffLUT{huffCodebook(1)}, coeffSec);
end
function [huffSec]=huffLUTCode1(huffLUT, coeffSec)
LUT=huffLUT.LUT;
huffCodebook=huffLUT.codebook;
nTupleSize=huffLUT.nTupleSize;
maxAbsCodeVal=huffLUT.maxAbsCodeVal;
signedValues=huffLUT.signedValues;
numTuples=ceil(length(coeffSec)/nTupleSize);
if(signedValues)
coeffSec=coeffSec+maxAbsCodeVal;
base=2*maxAbsCodeVal+1;
else
base=maxAbsCodeVal+1;
end
coeffSecPad=zeros(1,numTuples*nTupleSize);
coeffSecPad(1:length(coeffSec))=coeffSec;
for i=1:numTuples
nTuple=coeffSecPad((i-1)*nTupleSize+1:i*nTupleSize);
huffIndex=abs(nTuple)*(base.^[nTupleSize-1:-1:0])';
hexHuff=LUT(huffIndex+1,3);
hexHuff=dec2hex(hexHuff);%Dec values were saved. Converting to hex
huffSecLen=LUT(huffIndex+1,2);
if(signedValues)
huffSec{i}=dec2bin(hex2dec(hexHuff),huffSecLen);
else
huffSec{i}=[dec2bin(hex2dec(hexHuff),huffSecLen),strcat((num2str(nTuple'<0))')];%appending sign
end
end
huffSec=strcat([huffSec{:}]);
function [huffSec]=huffLUTCode0()
huffSec='';
function [huffSec]=huffLUTCodeESC(huffLUT, coeffSec)
LUT=huffLUT.LUT;
huffCodebook=huffLUT.codebook;
nTupleSize=huffLUT.nTupleSize;
maxAbsCodeVal=huffLUT.maxAbsCodeVal;
signedValues=huffLUT.signedValues;
numTuples=ceil(length(coeffSec)/nTupleSize);
base=maxAbsCodeVal+1;
coeffSecPad=zeros(1,numTuples*nTupleSize);
coeffSecPad(1:length(coeffSec))=coeffSec;
nTupleOffset=zeros(1,nTupleSize);
for i=1:numTuples
nTuple=coeffSecPad((i-1)*nTupleSize+1:i*nTupleSize);
lnTuple=nTuple;
lnTuple(lnTuple==0)=eps;
N4=max([0 0; floor(log2(abs(lnTuple)))]);
N=max([0 0 ; N4-4]);
esc=abs(nTuple)>15;
nTupleESC=nTuple;
nTupleESC(esc)=sign(nTupleESC(esc))*16;%Just keep the sixteens here (nTupleESC). nTuple contains the actual values
huffIndex=abs(nTupleESC)*(base.^[nTupleSize-1:-1:0])';
hexHuff=LUT(huffIndex+1,3);
hexHuff=dec2hex(hexHuff);%Dec values were saved. Converting to hex
huffSecLen=LUT(huffIndex+1,2);
%Adding sufficient ones to the prefix. If N<=0 empty string created
escape_prefix1='';
escape_prefix2='';
escape_prefix1(1:N(1))='1';
escape_prefix2(1:N(2))='1';
%Calculating the escape words. Taking absolute values. Will add the
%sign later on
if esc(1)
escape_separator1='0';
escape_word1=dec2bin(abs(nTuple(1))-2^(N4(1)),N4(1));
else
escape_separator1='';
escape_word1='';
end
if esc(2)
escape_separator2='0';
escape_word2=dec2bin(abs(nTuple(2))-2^(N4(2)),N4(2));
else
escape_separator2='';
escape_word2='';
end
% escape_word1=dec2bin(abs(nTuple(1))-2^(N4(1)),N4(1));
escSeq=[escape_prefix1, escape_separator1, escape_word1, escape_prefix2, escape_separator2, escape_word2];
%adding the sign bits and the escape sequence
huffSec{i}=[dec2bin(hex2dec(hexHuff),huffSecLen),strcat((num2str(nTuple'<0))'), escSeq];%appending sign
end
huffSec=strcat([huffSec{:}]);

100
Level_3/filterbank.m

@ -0,0 +1,100 @@
function frameF = filterbank(frameT, frameType, winType)
%Implementation of the Filter Bank step
% Usage frameF = filterbank(frameT, frameType, winType), where:
% Inputs
% - frameT is a frame in the time domain, containing both channels of
% the audio stored in an array of dimensions 2048X2
% - frameType is the type of the current frame in string
% representation, can be one of "OLS" (ONLY_LONG_SEQUENCE), "LSS"
% (LONG_START_SEQUENCE), "ESH" (EIGHT_SHORT_SEQUENCE), "LPS"
% (LONG_STOP_SEQUENCE)
% - winType is the type of the window selected, can be one of "KBD",
% "SIN"
%
% Output
% - frameF is the frame in the frequency domain, in MDCT coefficients
% representation containing both channels of the audio stored in an
% array of dimensions 1024X2
% Declares persistent windows variables and initializes if empty
persistent kaiserWindowLong kaiserWindowShort sinWindowLong sinWindowShort;
if isempty(kaiserWindowLong) || isempty(kaiserWindowShort) || ...
isempty(sinWindowLong) || isempty(sinWindowShort)
kaiserLong = kaiser(1025, 6*pi);
kaiserSumLong = sum(kaiserLong);
kaiserShort = kaiser(129, 4*pi);
kaiserSumShort = sum(kaiserShort);
kaiserWindowLong(1:1024) = movsum(kaiserLong(1:1024), [1024 0]);
kaiserWindowLong(1025:2048) = movsum(flipud(kaiserLong(1:1024)), [0 1024]);
kaiserWindowLong = sqrt(kaiserWindowLong ./ kaiserSumLong);
sinWindowLong = sin(pi * ((0:2047) + 0.5) / 2048);
kaiserWindowShort(1:128) = movsum(kaiserShort(1:128), [128 0]);
kaiserWindowShort(129:256) = movsum(flipud(kaiserShort(1:128)), [0 128]);
kaiserWindowShort = sqrt(kaiserWindowShort ./ kaiserSumShort);
sinWindowShort = sin(pi * ((0:255) + 0.5) / 256);
end
% Initializes output array
frameF(1024, 2) = 0;
% Applies appropriate window to the frame
if strcmp(frameType, 'OLS')
if strcmp(winType, 'KBD')
frameT = bsxfun(@times, frameT, kaiserWindowLong');
elseif strcmp(winType, 'SIN')
frameT = bsxfun(@times, frameT, sinWindowLong');
else
error('filterbank, l[20]: Unsupported window type input!')
end
frameF = mdct4(frameT);
elseif strcmp(frameType, 'LSS')
if strcmp(winType, 'KBD')
frameT(1:1024, :) = bsxfun(@times, frameT(1:1024, :), kaiserWindowLong(1:1024)');
frameT(1473:1600, :) = bsxfun(@times, frameT(1473:1600, :), kaiserWindowShort(129:end)');
frameT(1601:end, :) = 0;
elseif strcmp(winType, 'SIN')
frameT(1:1024, :) = bsxfun(@times, frameT(1:1024, :), sinWindowLong(1:1024)');
frameT(1473:1600, :) = bsxfun(@times, frameT(1473:1600, :), sinWindowShort(129:end)');
frameT(1601:end, :) = 0;
else
error('filterbank, l[20]: Unsupported window type input!')
end
frameF = mdct4(frameT);
elseif strcmp(frameType, 'LPS')
if strcmp(winType, 'KBD')
frameT(1:448, :) = 0;
frameT(449:576, :) = bsxfun(@times, frameT(449:576, :), kaiserWindowShort(1:128)');
frameT(1025:end, :) = bsxfun(@times, frameT(1025:end, :), kaiserWindowLong(1025:end)');
elseif strcmp(winType, 'SIN')
frameT(1:448, :) = 0;
frameT(449:576, :) = bsxfun(@times, frameT(449:576, :), sinWindowShort(1:128)');
frameT(1025:end, :) = bsxfun(@times, frameT(1025:end, :), sinWindowLong(1025:end)');
else
error('filterbank, l[20]: Unsupported window type input!')
end
frameF = mdct4(frameT);
elseif strcmp(frameType, 'ESH')
for channel = 1:2
% Splits the frame into sub-frames
[subFrames, ~] = buffer(frameT(449:end-448, channel), 256, 128, 'nodelay');
if strcmp(winType, 'KBD')
subFrames = subFrames .* repmat(kaiserWindowShort', [1 8]);
elseif strcmp(winType, 'SIN')
subFrames = subFrames .* repmat(sinWindowShort', [1 8]);
end
for subFrameIndex = 1:8
frameF((subFrameIndex - 1) * 128 + 1:subFrameIndex * 128, channel) = ...
mdct4(subFrames(:, subFrameIndex));
end
end
end
end

BIN
Level_3/huffCodebookSF.mat

Binary file not shown.

BIN
Level_3/huffCodebooks.mat

Binary file not shown.

73
Level_3/iAACoder3.m

@ -0,0 +1,73 @@
function x = iAACoder3(AACSeq3, fNameOut)
%Implementation of AAC decoder
% Usage x = iAACoder3(AACSeq3, fNameOut), where:
% Inputs
% - fNameOut is the filename and path of the file that will be
% written after decoding
% - AACSeq3 is an array of structs containing K structs, where K is
% the number of computed frames. Every struct of the array consists
% of:
% * a frameType,
% * a winType,
% * chl.TNScoeffs which are the quantized TNS coefficients of
% this frame's left channel,
% * chr.TNScoeffs which are the quantized TNS coefficients of
% this frame's right channel,
% * chl.T which are the psychoacoustic thresholds of this frame's
% left channel,
% * chr.T which are the psychoacoustic thresholds of this frame's
% right channel,
% * chl.G which are the quantized global gains of this frame's
% left channel,
% * chr.G which are the quantized global gains of this frame's
% right channel,
% * chl.sfc which is the Huffman encoded sfc sequence of this
% frame's left channel,
% * chr.sfc which is the Huffman encoded sfc sequence of this
% frame's right channel,
% * chl.stream which is the Huffman encoded quantized MDCT
% sequence of this frame's left channel,
% * chr.stream which is the Huffman encoded quantized MDCT
% sequence of this frame's right channel,
% * chl.codebook which is the Huffman codebook used for this
% frame's left channel
% * chr.codebook which is the Huffman codebook used for this
% frame's right channel
%
% Output
% - x is an array containing the decoded audio samples
% Initializes an array to hold the decoded samples
decodedAudio(1024 * (length(AACSeq3) + 1), 2) = 0;
% Initializes an array to hold both audio channels
frameF(1024, 2) = 0;
% Decodes audio file
huffLUT = loadLUT();
for i = 0:length(AACSeq3) - 1
currFrameStart = i * 1024 + 1;
currFrameStop = currFrameStart + 2047;
SL = decodeHuff(AACSeq3(i + 1).chl.stream, ...
AACSeq3(i + 1).chl.codebook, huffLUT);
SR = decodeHuff(AACSeq3(i + 1).chr.stream, ...
AACSeq3(i + 1).chr.codebook, huffLUT);
sfcL = decodeHuff(AACSeq3(i + 1).chl.sfc, 12, huffLUT); % TODO: maybe LUT(12);
sfcR = decodeHuff(AACSeq3(i + 1).chr.sfc, 12, huffLUT);
frameF(:, 1) = iAACquantizer(SL, sfcL, AACSeq3(i + 1).chl.G, AACSeq3(i+1).frameType);
frameF(:, 2) = iAACquantizer(SR, sfcR, AACSeq3(i + 1).chr.G, AACSeq3(i+1).frameType);
TNScoeffsL = AACSeq3(i + 1).chl.TNScoeffs;
TNScoeffsR = AACSeq3(i + 1).chr.TNScoeffs;
frameF(:, 1) = iTNS(frameF(:, 1), AACSeq3(i+1).frameType, TNScoeffsL);
frameF(:, 2) = iTNS(frameF(:, 2), AACSeq3(i+1).frameType, TNScoeffsR);
frameT = iFilterbank(frameF, AACSeq3(i+1).frameType, AACSeq3(i+1).winType);
decodedAudio(currFrameStart:currFrameStop, :) = decodedAudio(currFrameStart:currFrameStop, :) + frameT;
end
audiowrite(fNameOut, decodedAudio, 48000);
x = decodedAudio;
end

24
Level_3/iAACquantizer.m

@ -0,0 +1,24 @@
function frameF = iAACquantizer(S, sfc, G, frameType)
%Implementation of the inverse Quantizer
% Usage frameF = iAACquantizer(S, sfc, G, frameType), where:
% Inputs
% - S are the MDCT quantization symbols of one audio channel stored
% in a vector of length 1024
% - sfc are the scalefactors per band stored in an array of
% dimensions NBX8 for EIGHT_SHORT_SEQUENCE frames and NBX1
% otherwise, where NB is the number of bands
% - G is the global gain stored in an array of dimensions 1X8 for
% EIGHT_SHORT_SEQUENCE frames and a single value otherwise
% - frameType is the type of the current frame in string
% representation, can be one of "OLS" (ONLY_LONG_SEQUENCE), "LSS"
% (LONG_START_SEQUENCE), "ESH" (EIGHT_SHORT_SEQUENCE), "LPS"
% (LONG_STOP_SEQUENCE)
%
% Output
% - frameF is the frame in the frequency domain, in MDCT coefficients
% representation containing only one of the audio channels stored in
% a vector of length 1024
end

99
Level_3/iFilterbank.m

@ -0,0 +1,99 @@
function frameT = iFilterbank(frameF, frameType, winType)
%Implementation of the Inverse Filter Bank step
% Usage frameT = iFilterbank(frameF, frameType, winType), where:
% Inputs
% - frameF is the frame in the frequency domain, in MDCT coefficients
% representation containing both channels of the audio stored in an
% array of dimensions 1024X2
% - frameType is the type of the current frame in string
% representation, can be one of "OLS" (ONLY_LONG_SEQUENCE), "LSS"
% (LONG_START_SEQUENCE), "ESH" (EIGHT_SHORT_SEQUENCE), "LPS"
% (LONG_STOP_SEQUENCE)
% - winType is the type of the window selected, can be one of "KBD",
% "SIN"
%
% Output
% - frameT is a frame in the time domain, containing both channels of
% the audio stored in an array of dimensions 2048X2
% Declares persistent windows variables and initializes if empty
persistent kaiserWindowLong kaiserWindowShort sinWindowLong sinWindowShort;
if isempty(kaiserWindowLong) || isempty(kaiserWindowShort) || ...
isempty(sinWindowLong) || isempty(sinWindowShort)
kaiserLong = kaiser(1025, 6*pi);
kaiserSumLong = sum(kaiserLong);
kaiserShort = kaiser(129, 4*pi);
kaiserSumShort = sum(kaiserShort);
kaiserWindowLong(1:1024) = movsum(kaiserLong(1:1024), [1024 0]);
kaiserWindowLong(1025:2048) = movsum(flipud(kaiserLong(1:1024)), [0 1024]);
kaiserWindowLong = sqrt(kaiserWindowLong ./ kaiserSumLong);
sinWindowLong = sin(pi * ((0:2047) + 0.5) / 2048);
kaiserWindowShort(1:128) = movsum(kaiserShort(1:128), [128 0]);
kaiserWindowShort(129:256) = movsum(flipud(kaiserShort(1:128)), [0 128]);
kaiserWindowShort = sqrt(kaiserWindowShort ./ kaiserSumShort);
sinWindowShort = sin(pi * ((0:255) + 0.5) / 256);
end
% Initializes output array
frameT(2048, 2) = 0;
% Applies appropriate window to the frame
if strcmp(frameType, 'OLS')
frameT = imdct4(frameF);
if strcmp(winType, 'KBD')
frameT = bsxfun(@times, frameT, kaiserWindowLong');
elseif strcmp(winType, 'SIN')
frameT = bsxfun(@times, frameT, sinWindowLong');
else
error('filterbank, l[20]: Unsupported window type input!')
end
elseif strcmp(frameType, 'LSS')
frameT = imdct4(frameF);
if strcmp(winType, 'KBD')
frameT(1:1024, :) = bsxfun(@times, frameT(1:1024, :), kaiserWindowLong(1:1024)');
frameT(1473:1600, :) = bsxfun(@times, frameT(1473:1600, :), kaiserWindowShort(129:end)');
frameT(1601:end, :) = 0;
elseif strcmp(winType, 'SIN')
frameT(1:1024, :) = bsxfun(@times, frameT(1:1024, :), sinWindowLong(1:1024)');
frameT(1473:1600, :) = bsxfun(@times, frameT(1473:1600, :), sinWindowShort(129:end)');
frameT(1601:end, :) = 0;
else
error('filterbank, l[20]: Unsupported window type input!')
end
elseif strcmp(frameType, 'LPS')
frameT = imdct4(frameF);
if strcmp(winType, 'KBD')
frameT(1:448, :) = 0;
frameT(449:576, :) = bsxfun(@times, frameT(449:576, :), kaiserWindowShort(1:128)');
frameT(1025:end, :) = bsxfun(@times, frameT(1025:end, :), kaiserWindowLong(1025:end)');
elseif strcmp(winType, 'SIN')
frameT(1:448, :) = 0;
frameT(449:576, :) = bsxfun(@times, frameT(449:576, :), sinWindowShort(1:128)');
frameT(1025:end, :) = bsxfun(@times, frameT(1025:end, :), sinWindowLong(1025:end)');
else
error('filterbank, l[20]: Unsupported window type input!')
end
elseif strcmp(frameType, 'ESH')
for channel=1:2
for subFrameIndex = 1:8
subFrame = imdct4(frameF((subFrameIndex - 1) * 128 + 1:subFrameIndex * 128, channel));
if strcmp(winType, 'KBD')
subFrame = subFrame .* kaiserWindowShort';
elseif strcmp(winType, 'SIN')
subFrame = subFrame .* sinWindowShort';
end
frameT(448 + (subFrameIndex - 1) * 128 + 1:448 + (subFrameIndex + 1) * 128, channel) = ...
frameT(448 + (subFrameIndex - 1) * 128 + 1:448 + (subFrameIndex + 1) * 128, channel) + subFrame;
end
end
end
end

38
Level_3/iTNS.m

@ -0,0 +1,38 @@
function frameFout = iTNS(frameFin, frameType, TNScoeffs)
%Implementation of the reverse TNS step
% Usage frameFout = iTNS(frameFin, frameType, TNScoeffs), where:
% Inputs
% - frameFin is the frame in the frequency domain, in MDCT coefficients
% representation containing both channels of the audio stored in an
% array of dimensions 1024X2
% - frameType is the type of the current frame in string
% representation, can be one of "OLS" (ONLY_LONG_SEQUENCE), "LSS"
% (LONG_START_SEQUENCE), "ESH" (EIGHT_SHORT_SEQUENCE), "LPS"
% (LONG_STOP_SEQUENCE)
% - TNScoeffs is the quantized TNS coefficients array of dimensions
% 4X8 for EIGHT_SHORT_SEQUENCE frames and 4X1 otherwise
%
% Output
% - frameFout is the frame in the frequency domain after Temporal Noise
% Shaping, in MDCT coefficients representation containing both channels
% of the audio stored in an array of dimensions 1024X2
% Declares persistent variable holding the TNS tables and initializes if empty
persistent TNSTables;
if isempty(TNSTables)
TNSTables = load('TableB219.mat');
end
if ~strcmp(frameType, 'ESH')
% Inverses MDCT coefficients filtering
frameFout = filter(1, [1 TNScoeffs], frameFin);
else
for subFrameIndex = 1:8
subFrame = frameFin((subFrameIndex - 1) * 128 + 1:subFrameIndex * 128);
% Inverses MDCT coefficients filtering
frameFout((subFrameIndex - 1) * 128 + 1:subFrameIndex * 128) = ...
filter(1, [1 TNScoeffs((subFrameIndex - 1) * 4 + 1:subFrameIndex * 4)], subFrame);
end
end
end

61
Level_3/imdct4.m

@ -0,0 +1,61 @@
function y = imdct4(x)
% IMDCT4 Calculates the Modified Discrete Cosine Transform
% y = imdct4(x)
%
% x: input signal (can be either a column or frame per column)
% y: IMDCT of x
%
% Vectorize ! ! !
% ------- imdct4.m -----------------------------------------
% Marios Athineos, marios@ee.columbia.edu
% http://www.ee.columbia.edu/~marios/
% Copyright (c) 2002 by Columbia University.
% All rights reserved.
% ----------------------------------------------------------
[flen,fnum] = size(x);
% Make column if it's a single row
if (flen==1)
x = x(:);
flen = fnum;
fnum = 1;
end
% We need these for furmulas below
N = flen;
M = N/2;
twoN = 2*N;
sqrtN = sqrt(twoN);
% We need this twice so keep it around
t = (0:(M-1)).';
w = diag(sparse(exp(-j*2*pi*(t+1/8)/twoN)));
% Pre-twiddle
t = (0:(M-1)).';
c = x(2*t+1,:) + j*x(N-1-2*t+1,:);
c = (0.5*w)*c;
% FFT for N/2 points only !!!
c = fft(c,M);
% Post-twiddle
c = ((8/sqrtN)*w)*c;
% Preallocate rotation matrix
rot = zeros(twoN,fnum);
% Sort
t = (0:(M-1)).';
rot(2*t+1,:) = real(c(t+1,:));
rot(N+2*t+1,:) = imag(c(t+1,:));
t = (1:2:(twoN-1)).';
rot(t+1,:) = -rot(twoN-1-t+1,:);
% Shift
t = (0:(3*M-1)).';
y(t+1,:) = rot(t+M+1,:);
t = (3*M:(twoN-1)).';
y(t+1,:) = -rot(t-3*M+1,:);

50
Level_3/loadLUT.m

@ -0,0 +1,50 @@
function huffLUT = loadLUT()
load huffCodebooks
load huffCodebookSF
huffCodebooks{end + 1} = huffCodebookSF;
for i=1:12
h=huffCodebooks{i}(:,3);
hlength=huffCodebooks{i}(:,2);
for j=1:length(h)
hbin{j}=dec2bin(h(j),hlength(j));
end
invTable{i}=vlcTable(hbin);
clear hbin;
end
huffLUT{1}=struct('LUT',huffCodebooks{1},'invTable',invTable{1},'codebook',1,'nTupleSize',4,'maxAbsCodeVal',1,'signedValues',1);
huffLUT{2}=struct('LUT',huffCodebooks{2},'invTable',invTable{2},'codebook',2,'nTupleSize',4,'maxAbsCodeVal',1,'signedValues',1);
huffLUT{3}=struct('LUT',huffCodebooks{3},'invTable',invTable{3},'codebook',3,'nTupleSize',4,'maxAbsCodeVal',2,'signedValues',0);
huffLUT{4}=struct('LUT',huffCodebooks{4},'invTable',invTable{4},'codebook',4,'nTupleSize',4,'maxAbsCodeVal',2,'signedValues',0);
huffLUT{5}=struct('LUT',huffCodebooks{5},'invTable',invTable{5},'codebook',5,'nTupleSize',2,'maxAbsCodeVal',4,'signedValues',1);
huffLUT{6}=struct('LUT',huffCodebooks{6},'invTable',invTable{6},'codebook',6,'nTupleSize',2,'maxAbsCodeVal',4,'signedValues',1);
huffLUT{7}=struct('LUT',huffCodebooks{7},'invTable',invTable{7},'codebook',7,'nTupleSize',2,'maxAbsCodeVal',7,'signedValues',0);
huffLUT{8}=struct('LUT',huffCodebooks{8},'invTable',invTable{8},'codebook',8,'nTupleSize',2,'maxAbsCodeVal',7,'signedValues',0);
huffLUT{9}=struct('LUT',huffCodebooks{9},'invTable',invTable{9},'codebook',9,'nTupleSize',2,'maxAbsCodeVal',12,'signedValues',0);
huffLUT{10}=struct('LUT',huffCodebooks{10},'invTable',invTable{10},'codebook',10,'nTupleSize',2,'maxAbsCodeVal',12,'signedValues',0);
huffLUT{11}=struct('LUT',huffCodebooks{11},'invTable',invTable{11},'codebook',11,'nTupleSize',2,'maxAbsCodeVal',16,'signedValues',0);
huffLUT{12}=struct('LUT',huffCodebooks{12},'invTable',invTable{12},'codebook',12,'nTupleSize',1,'maxAbsCodeVal',60,'signedValues',1);
function [h]=vlcTable(codeArray)
% Generates the inverse variable length coding tree, stored in a table. It
% is used for decoding.
%
% codeArray: the input matrix, in the form of a cell array, with each cell
% containing codewords represented in string format (array of chars)
h=zeros(1,3);
for codeIndex=1:length(codeArray)
word=str2num([codeArray{codeIndex}]')';
hIndex=1;
hLength=size(h,1);
for i=1:length(word)
k=word(i)+1;
if(~h(hIndex,k))
h(hIndex,k)=hLength+1;
hIndex=hLength+1;
h(hIndex,1:2)=zeros(1,2);
hLength=hLength+1;
else
hIndex=h(hIndex,k);
end
end
h(hIndex,3)=codeIndex;
end

75
Level_3/mdct4.m

@ -0,0 +1,75 @@
function y = mdct4(x)
% MDCT4 Calculates the Modified Discrete Cosine Transform
% y = mdct4(x)
%
% Use either a Sine or a Kaiser-Bessel Derived window (KBDWin)with
% 50% overlap for perfect TDAC reconstruction.
% Remember that MDCT coefs are symmetric: y(k)=-y(N-k-1) so the full
% matrix (N) of coefs is: yf = [y;-flipud(y)];
%
% x: input signal (can be either a column or frame per column)
% length of x must be a integer multiple of 4 (each frame)
% y: MDCT of x (coefs are divided by sqrt(N))
%
% Vectorize ! ! !
% ------- mdct4.m ------------------------------------------
% Marios Athineos, marios@ee.columbia.edu
% http://www.ee.columbia.edu/~marios/
% Copyright (c) 2002 by Columbia University.
% All rights reserved.
% ----------------------------------------------------------
[flen,fnum] = size(x);
% Make column if it's a single row
if (flen==1)
x = x(:);
flen = fnum;
fnum = 1;
end
% Make sure length is multiple of 4
if (rem(flen,4)~=0)
error('MDCT4 defined for lengths multiple of four.');
end
% We need these for furmulas below
N = flen; % Length of window
M = N/2; % Number of coefficients
N4 = N/4; % Simplify the way eqs look
sqrtN = sqrt(N);
% Preallocate rotation matrix
% It would be nice to be able to do it in-place but we cannot
% cause of the prerotation.
rot = zeros(flen,fnum);
% Shift
t = (0:(N4-1)).';
rot(t+1,:) = -x(t+3*N4+1,:);
t = (N4:(N-1)).';
rot(t+1,:) = x(t-N4+1,:);
clear x;
% We need this twice so keep it around
t = (0:(N4-1)).';
w = diag(sparse(exp(-j*2*pi*(t+1/8)/N)));
% Pre-twiddle
t = (0:(N4-1)).';
c = (rot(2*t+1,:)-rot(N-1-2*t+1,:))...
-j*(rot(M+2*t+1,:)-rot(M-1-2*t+1,:));
% This is a really cool Matlab trick ;)
c = 0.5*w*c;
clear rot;
% FFT for N/4 points only !!!
c = fft(c,N4);
% Post-twiddle
c = (2/sqrtN)*w*c;
% Sort
t = (0:(N4-1)).';
y(2*t+1,:) = real(c(t+1,:));
y(M-1-2*t+1,:) = -imag(c(t+1,:));

23
Level_3/psycho.m

@ -0,0 +1,23 @@
function SMR = psycho(frameT, frameType, frameTprev1, frameTprev2)
%Implementation of Psychoacoustic Model
% Usage SMR = psycho(frameT, frameType, frameTprev1, frameTprev2), where:
% Inputs
% - frameT is a frame in the time domain, containing only one of the
% audio channels stored in a vector of length 2048
% - frameType is the type of the current frame in string
% representation, can be one of "OLS" (ONLY_LONG_SEQUENCE), "LSS"
% (LONG_START_SEQUENCE), "ESH" (EIGHT_SHORT_SEQUENCE), "LPS"
% (LONG_STOP_SEQUENCE)
% - frameTprev1 is the previous frame in the time domain, containing
% only one of the audio channels stored in a vector of length 2048
% - frameTprev2 is the frame before frameTprev1 in the time domain,
% containing only one of the audio channels stored in a vector of
% length 2048
%
% Output
% - SMR is the signal to mask ratio array of dimensions 42X8 for
% EIGHT_SHORT_SEQUENCE frames and 69X1 otherwise
end
Loading…
Cancel
Save