Apostolos Fanakis
6 years ago
20 changed files with 1218 additions and 0 deletions
@ -0,0 +1,123 @@ |
|||||
|
function AACSeq3 = AACoder3(fNameIn, fnameAACoded) |
||||
|
%Implementation of AAC encoder |
||||
|
% Usage AACSeq3 = AACoder3(fNameIn, fnameAACoded), where: |
||||
|
% Inputs |
||||
|
% - fNameIn is the filename and path of the file to encode |
||||
|
% - frameAACoded is the filename and path of the mat file that will |
||||
|
% be written after encoding |
||||
|
% |
||||
|
% Output |
||||
|
% - AACSeq3 is an array of structs containing K structs, where K is |
||||
|
% the number of computed frames. Every struct of the array consists |
||||
|
% of: |
||||
|
% * a frameType, |
||||
|
% * a winType, |
||||
|
% * chl.TNScoeffs which are the quantized TNS coefficients of |
||||
|
% this frame's left channel, |
||||
|
% * chr.TNScoeffs which are the quantized TNS coefficients of |
||||
|
% this frame's right channel, |
||||
|
% * chl.T which are the psychoacoustic thresholds of this frame's |
||||
|
% left channel, |
||||
|
% * chr.T which are the psychoacoustic thresholds of this frame's |
||||
|
% right channel, |
||||
|
% * chl.G which are the quantized global gains of this frame's |
||||
|
% left channel, |
||||
|
% * chr.G which are the quantized global gains of this frame's |
||||
|
% right channel, |
||||
|
% * chl.sfc which is the Huffman encoded sfc sequence of this |
||||
|
% frame's left channel, |
||||
|
% * chr.sfc which is the Huffman encoded sfc sequence of this |
||||
|
% frame's right channel, |
||||
|
% * chl.stream which is the Huffman encoded quantized MDCT |
||||
|
% sequence of this frame's left channel, |
||||
|
% * chr.stream which is the Huffman encoded quantized MDCT |
||||
|
% sequence of this frame's right channel, |
||||
|
% * chl.codebook which is the Huffman codebook used for this |
||||
|
% frame's left channel |
||||
|
% * chr.codebook which is the Huffman codebook used for this |
||||
|
% frame's right channel |
||||
|
|
||||
|
% Declares constant window type |
||||
|
WINDOW_TYPE = 'SIN'; |
||||
|
|
||||
|
% Reads the audio file |
||||
|
[originalAudioData, ~] = audioread(fNameIn); |
||||
|
|
||||
|
% Splits the audio in frames and determines the type of each frame |
||||
|
frameTypes{fix((length(originalAudioData) - 1025) / 1024)} = 'OLS'; |
||||
|
frameTypes{1} = 'OLS'; |
||||
|
for i = 1:length(frameTypes) - 2 |
||||
|
nextFrameStart = (i + 1) * 1024 + 1; |
||||
|
nextFrameStop = nextFrameStart + 2047; |
||||
|
frameTypes{i+1} = SSC(1, originalAudioData(nextFrameStart:nextFrameStop, :), frameTypes{i}); |
||||
|
end |
||||
|
|
||||
|
% Assigns a type to the last frame |
||||
|
if strcmp(frameTypes{length(frameTypes) - 1}, 'LSS') |
||||
|
frameTypes{length(frameTypes)} = 'ESH'; |
||||
|
elseif strcmp(frameTypes{length(frameTypes) - 1}, 'ESH') |
||||
|
frameTypes{length(frameTypes)} = 'ESH'; |
||||
|
else |
||||
|
frameTypes{length(frameTypes)} = 'OLS'; |
||||
|
end |
||||
|
|
||||
|
% Encodes audio file |
||||
|
huffLUT = loadLUT(); |
||||
|
AACSeq3(length(frameTypes)) = struct; |
||||
|
for i = 0:length(frameTypes) - 1 |
||||
|
currFrameStart = i * 1024 + 1; |
||||
|
currFrameStop = currFrameStart + 2047; |
||||
|
frameF = filterbank(originalAudioData(currFrameStart:currFrameStop, :), frameTypes{i+1}, WINDOW_TYPE); |
||||
|
[frameF(:, 1), TNScoeffsL] = TNS(frameF(:, 1), frameTypes{i+1}); |
||||
|
[frameF(:, 2), TNScoeffsR] = TNS(frameF(:, 2), frameTypes{i+1}); |
||||
|
|
||||
|
if i < 2 |
||||
|
% TODO: what happens on the first two frames? |
||||
|
else |
||||
|
prev1FrameStart = (i - 1) * 1024 + 1; |
||||
|
prev1FrameStop = prev1FrameStart + 2047; |
||||
|
prev2FrameStart = (i - 2) * 1024 + 1; |
||||
|
prev2FrameStop = prev1FrameStart + 2047; |
||||
|
SMR = psycho(... |
||||
|
originalAudioData(currFrameStart:currFrameStop, :), ... |
||||
|
frameTypes{i+1}, ... |
||||
|
originalAudioData(prev1FrameStart:prev1FrameStop, :), ... |
||||
|
originalAudioData(prev2FrameStart:prev2FrameStop, :)); |
||||
|
[SL, sfcL, GL] = AACquantizer(frameF, frameTypes{i+1}, SMR); |
||||
|
[SR, sfcR, GR] = AACquantizer(frameF, frameTypes{i+1}, SMR); |
||||
|
end |
||||
|
|
||||
|
[streamL, huffcodebookL] = encodeHuff(SL, huffLUT); |
||||
|
[streamR, huffcodebookR] = encodeHuff(SR, huffLUT); |
||||
|
[sfcL, ~] = encodeHuff(sfcL, huffLUT, 12); |
||||
|
[sfcR, ~] = encodeHuff(sfcR, huffLUT, 12); |
||||
|
|
||||
|
AACSeq3(i + 1).frameType = frameTypes(i + 1); |
||||
|
AACSeq3(i + 1).winType = WINDOW_TYPE; |
||||
|
AACSeq3(i + 1).chl.TNScoeffs = TNScoeffsL; |
||||
|
AACSeq3(i + 1).chr.TNScoeffs = TNScoeffsR; |
||||
|
AACSeq3(i + 1).chl.T = what; % TODO: find dis shit |
||||
|
AACSeq3(i + 1).chr.T = what; |
||||
|
AACSeq3(i + 1).chl.G = GL; |
||||
|
AACSeq3(i + 1).chr.G = GR; |
||||
|
AACSeq3(i + 1).chl.sfc = sfcL; |
||||
|
AACSeq3(i + 1).chr.sfc = sfcR; |
||||
|
AACSeq3(i + 1).chl.stream = streamL; |
||||
|
AACSeq3(i + 1).chr.stream = streamR; |
||||
|
AACSeq3(i + 1).chl.codebook = huffcodebookL; |
||||
|
AACSeq3(i + 1).chr.codebook = huffcodebookR; |
||||
|
end |
||||
|
|
||||
|
save(fnameAACoded,AACSeq3); |
||||
|
|
||||
|
if false |
||||
|
[idx,label] = grp2idx(sort(frameTypes)); |
||||
|
hist(idx,unique(idx)); |
||||
|
set(gca,'xTickLabel',label) |
||||
|
|
||||
|
sum(idx(:) == 1) |
||||
|
sum(idx(:) == 2) |
||||
|
sum(idx(:) == 3) |
||||
|
sum(idx(:) == 4) |
||||
|
end |
||||
|
end |
@ -0,0 +1,26 @@ |
|||||
|
function [S, sfc, G] = AACquantizer(frameF, frameType, SMR) |
||||
|
%Implementation of Quantizer |
||||
|
% Usage [S, sfc, G] = AACquantizer(frameF, frameType, SMR), where: |
||||
|
% Inputs |
||||
|
% - frameF is the frame in the frequency domain, in MDCT coefficients |
||||
|
% representation containing only one of the audio channels stored in |
||||
|
% a vector of length 1024 |
||||
|
% - frameType is the type of the current frame in string |
||||
|
% representation, can be one of "OLS" (ONLY_LONG_SEQUENCE), "LSS" |
||||
|
% (LONG_START_SEQUENCE), "ESH" (EIGHT_SHORT_SEQUENCE), "LPS" |
||||
|
% (LONG_STOP_SEQUENCE) |
||||
|
% - SMR is the signal to mask ratio array of dimensions 42X8 for |
||||
|
% EIGHT_SHORT_SEQUENCE frames and 69X1 otherwise |
||||
|
% |
||||
|
% Output |
||||
|
% - S are the MDCT quantization symbols of one audio channel stored |
||||
|
% in a vector of length 1024 |
||||
|
% - sfc are the scalefactors per band stored in an array of |
||||
|
% dimensions NBX8 for EIGHT_SHORT_SEQUENCE frames and NBX1 |
||||
|
% otherwise, where NB is the number of bands |
||||
|
% - G is the global gain stored in an array of dimensions 1X8 for |
||||
|
% EIGHT_SHORT_SEQUENCE frames and a single value otherwise |
||||
|
|
||||
|
|
||||
|
end |
||||
|
|
Binary file not shown.
@ -0,0 +1,78 @@ |
|||||
|
function frameType = SSC(~, nextFrameT, prevFrameType) |
||||
|
%Implementation of the Sequence Segmentation Control step |
||||
|
% Usage frameType = SSC(frameT, nextFrameT, prevFrameType), where: |
||||
|
% Inputs |
||||
|
% - frameT is a frame in the time domain, containing both channels of |
||||
|
% the audio stored in an array of dimensions 2048X2 |
||||
|
% - nextFrameT is the next frame in the time domain, containing both |
||||
|
% channels of the audio stored in an array of dimensions 2048X2 |
||||
|
% - prevFrameType is the type of the previous frame in string |
||||
|
% representation, can be one of "OLS" (ONLY_LONG_SEQUENCE), "LSS" |
||||
|
% (LONG_START_SEQUENCE), "ESH" (EIGHT_SHORT_SEQUENCE), "LPS" |
||||
|
% (LONG_STOP_SEQUENCE) |
||||
|
% |
||||
|
% Output |
||||
|
% - frameType is the type of the current frame in string |
||||
|
% representation, can be one of "OLS" (ONLY_LONG_SEQUENCE), "LSS" |
||||
|
% (LONG_START_SEQUENCE), "ESH" (EIGHT_SHORT_SEQUENCE), "LPS" |
||||
|
% (LONG_STOP_SEQUENCE) |
||||
|
|
||||
|
% Examines the cases where the determination of the type of the next |
||||
|
% frame isn't needed for |
||||
|
if strcmp(prevFrameType, 'LSS') |
||||
|
frameType = 'ESH'; |
||||
|
return; |
||||
|
elseif strcmp(prevFrameType, 'LPS') |
||||
|
frameType = 'OLS'; |
||||
|
return; |
||||
|
end |
||||
|
|
||||
|
% Determines the type of the next frame |
||||
|
% Filters frame |
||||
|
nextFrameT = filter([0.7548 -0.7548], [1 -0.5095], nextFrameT, [], 1); |
||||
|
channelFrameType = {'nan', 'nan'}; |
||||
|
for channel = 1:2 |
||||
|
% Calculates sub-frame energy estimation |
||||
|
[subFrames, ~] = buffer(nextFrameT(577:end - 448, channel), 128, 0, 'nodelay'); |
||||
|
energyEstimations = sum(subFrames .^ 2, 1); |
||||
|
|
||||
|
% Calculates the ratio of the sub-frame energy to the average energy of |
||||
|
% the previous sub-frames |
||||
|
energyRatios = movmean(energyEstimations, [8 0]); |
||||
|
energyRatios = energyEstimations ./ [energyEstimations(1) energyRatios(1:end-1)]; |
||||
|
|
||||
|
if ~isempty(find(energyEstimations > 10^(-3), 1)) && ... |
||||
|
~isempty(find(energyRatios(energyEstimations > 10^(-3)) > 10, 1)) |
||||
|
% Next frame is ESH |
||||
|
if strcmp(prevFrameType, 'ESH') |
||||
|
% This frame of this channel is an EIGHT_SHORT_SEQUENCE type |
||||
|
% frame. This means the frames of both channels will be encoded |
||||
|
% as EIGHT_SHORT_SEQUENCE type frames. |
||||
|
frameType = 'ESH'; |
||||
|
return; |
||||
|
elseif strcmp(prevFrameType, 'OLS') |
||||
|
channelFrameType{channel} = 'LSS'; |
||||
|
end |
||||
|
else |
||||
|
if strcmp(prevFrameType, 'ESH') |
||||
|
channelFrameType{channel} = 'LPS'; |
||||
|
elseif strcmp(prevFrameType, 'OLS') |
||||
|
channelFrameType{channel} = 'OLS'; |
||||
|
end |
||||
|
end |
||||
|
end |
||||
|
|
||||
|
% Joins decision for both channels |
||||
|
if strcmp(channelFrameType{1}, 'nan') || strcmp(channelFrameType{2}, 'nan') |
||||
|
error('SSC, l[73]: Internal error occurred!') |
||||
|
end |
||||
|
if strcmp(channelFrameType{1}, 'OLS') |
||||
|
frameType = channelFrameType{2}; |
||||
|
elseif strcmp(channelFrameType{2}, 'OLS') |
||||
|
frameType = channelFrameType{1}; |
||||
|
elseif strcmp(channelFrameType{1}, channelFrameType{2}) |
||||
|
frameType = channelFrameType{1}; |
||||
|
else |
||||
|
frameType = 'ESH'; |
||||
|
end |
||||
|
end |
@ -0,0 +1,119 @@ |
|||||
|
function [frameFout, TNScoeffs] = TNS(frameFin, frameType) |
||||
|
%Implementation of the TNS step |
||||
|
% Usage [frameFout, TNScoeffs] = TNS(frameFin, frameType), where: |
||||
|
% Inputs |
||||
|
% - frameFin is the frame in the frequency domain, in MDCT coefficients |
||||
|
% representation containing both channels of the audio stored in an |
||||
|
% array of dimensions 1024X2 |
||||
|
% - frameType is the type of the current frame in string |
||||
|
% representation, can be one of "OLS" (ONLY_LONG_SEQUENCE), "LSS" |
||||
|
% (LONG_START_SEQUENCE), "ESH" (EIGHT_SHORT_SEQUENCE), "LPS" |
||||
|
% (LONG_STOP_SEQUENCE) |
||||
|
% |
||||
|
% Output |
||||
|
% - frameFout is the frame in the frequency domain after Temporal Noise |
||||
|
% Shaping, in MDCT coefficients representation containing both channels |
||||
|
% of the audio stored in an array of dimensions 1024X2 |
||||
|
% - TNScoeffs is the quantized TNS coefficients array of dimensions |
||||
|
% 4X8 for EIGHT_SHORT_SEQUENCE frames and 4X1 otherwise |
||||
|
|
||||
|
% Declares constant numbers of bands for long and short windows |
||||
|
LONG_WINDOW_NUMBER_OF_BANDS = 69; |
||||
|
SHORT_WINDOW_NUMBER_OF_BANDS = 42; |
||||
|
% Declares constant order of the linear prediction filter |
||||
|
LPF_ORDER = 4; |
||||
|
% Declares constant coefficients' resolution, in the form of number of |
||||
|
% decimal digits used |
||||
|
COEF_RES = 1; |
||||
|
|
||||
|
% Declares persistent variable holding the TNS tables and initializes if empty |
||||
|
persistent TNSTables; |
||||
|
if isempty(TNSTables) |
||||
|
TNSTables = load('TableB219.mat'); |
||||
|
end |
||||
|
|
||||
|
if ~strcmp(frameType, 'ESH') |
||||
|
% Calculates the energy per band |
||||
|
bandEnergy(LONG_WINDOW_NUMBER_OF_BANDS) = 0; |
||||
|
for band = 1:LONG_WINDOW_NUMBER_OF_BANDS - 1 |
||||
|
bandEnergy(band) = sumsqr(frameFin(TNSTables.B219a(band, 2) + 1:TNSTables.B219a(band + 1, 2))); |
||||
|
end |
||||
|
bandEnergy(LONG_WINDOW_NUMBER_OF_BANDS) = sumsqr(frameFin( ... |
||||
|
TNSTables.B219a(LONG_WINDOW_NUMBER_OF_BANDS, 2) + 1:end)); |
||||
|
|
||||
|
% Calculates the normalization factors |
||||
|
bandIndices = quantiz(0:1023, TNSTables.B219a(:, 2) - 1); |
||||
|
normalizationFactor = sqrt(bandEnergy(bandIndices)); |
||||
|
|
||||
|
% Smooths normalization factors |
||||
|
for normIndex = 1023:-1:1 |
||||
|
normalizationFactor(normIndex) = (normalizationFactor(normIndex) + normalizationFactor(normIndex + 1)) / 2; |
||||
|
end |
||||
|
for normIndex = 2:1024 |
||||
|
normalizationFactor(normIndex) = (normalizationFactor(normIndex) + normalizationFactor(normIndex - 1)) / 2; |
||||
|
end |
||||
|
|
||||
|
% Normalizes MDCT coefficients according to each band energy |
||||
|
normalizedFrameFin = frameFin ./ normalizationFactor'; |
||||
|
|
||||
|
% Calculates the linear prediction coefficients |
||||
|
[linPredCoeff, ~] = lpc(normalizedFrameFin, LPF_ORDER); |
||||
|
|
||||
|
% Quantizes these coefficients |
||||
|
quantizedLinPredCoeff = round(linPredCoeff, COEF_RES); |
||||
|
|
||||
|
% Filters MDCT coefficients |
||||
|
if ~isstable(1, quantizedLinPredCoeff) |
||||
|
error('TNS, l[79]: Inverse filter not stable!'); |
||||
|
else |
||||
|
TNScoeffs = quantizedLinPredCoeff(2:end); |
||||
|
frameFout = filter(quantizedLinPredCoeff, 1, frameFin); |
||||
|
end |
||||
|
else |
||||
|
% Initializes output vectors |
||||
|
TNScoeffs(LPF_ORDER * 8) = 0; |
||||
|
frameFout(1024) = 0; |
||||
|
|
||||
|
bandEnergy(SHORT_WINDOW_NUMBER_OF_BANDS) = 0; |
||||
|
for subFrameIndex = 1:8 |
||||
|
subFrame = frameFin((subFrameIndex - 1) * 128 + 1:subFrameIndex * 128); |
||||
|
|
||||
|
% Calculates the energy per band |
||||
|
for band = 1:SHORT_WINDOW_NUMBER_OF_BANDS - 1 |
||||
|
bandEnergy(band) = sumsqr(subFrame(TNSTables.B219b(band, 2) + 1:TNSTables.B219b(band + 1, 2))); |
||||
|
end |
||||
|
bandEnergy(SHORT_WINDOW_NUMBER_OF_BANDS) = sumsqr(subFrame( ... |
||||
|
TNSTables.B219b(SHORT_WINDOW_NUMBER_OF_BANDS, 2) + 1:end)); |
||||
|
|
||||
|
% Calculates the normalization factors |
||||
|
bandIndices = quantiz(0:127, TNSTables.B219b(:, 2) - 1); |
||||
|
normalizationFactor = sqrt(bandEnergy(bandIndices)); |
||||
|
|
||||
|
% Smooths normalization factors |
||||
|
for normIndex = 127:-1:1 |
||||
|
normalizationFactor(normIndex) = (normalizationFactor(normIndex) + normalizationFactor(normIndex + 1)) / 2; |
||||
|
end |
||||
|
for normIndex = 2:128 |
||||
|
normalizationFactor(normIndex) = (normalizationFactor(normIndex) + normalizationFactor(normIndex - 1)) / 2; |
||||
|
end |
||||
|
|
||||
|
% Normalizes MDCT coefficients according to each band energy |
||||
|
normalizedFrameFin = subFrame ./ normalizationFactor'; |
||||
|
|
||||
|
% Calculates the linear prediction coefficients |
||||
|
[linPredCoeff, ~] = lpc(normalizedFrameFin, LPF_ORDER); |
||||
|
|
||||
|
% Quantizes these coefficients |
||||
|
quantizedLinPredCoeff = round(linPredCoeff, COEF_RES); |
||||
|
|
||||
|
% Filters MDCT coefficients |
||||
|
if ~isstable(1, quantizedLinPredCoeff) |
||||
|
error('TNS, l[79]: Inverse filter not stable!'); |
||||
|
else |
||||
|
TNScoeffs((subFrameIndex - 1) * 4 + 1:subFrameIndex * 4) = quantizedLinPredCoeff(2:end); |
||||
|
frameFout((subFrameIndex - 1) * 128 + 1:subFrameIndex * 128) = ... |
||||
|
filter(quantizedLinPredCoeff, 1, subFrame); |
||||
|
end |
||||
|
end |
||||
|
end |
||||
|
end |
Binary file not shown.
@ -0,0 +1,82 @@ |
|||||
|
function decCoeffs = decodeHuff(huffSec, huffCodebook, huffLUT) |
||||
|
% DECODEHUFF Huffman decoder stage |
||||
|
% |
||||
|
% decCoeffs = decodeHuff(huffSec, huffCodebook, huffLUT) performs huffman |
||||
|
% decoding, where huffSec is a string of '1' and '0' corresponding to the |
||||
|
% Huffman encoded stream, and huffCodebook is the index (0 to 12) of the |
||||
|
% codebook used, as outputted by encodeHuff. huffLUT is the Huffman |
||||
|
% look-up tables to be loaded using loadLUT.m. The output decCoeffs is |
||||
|
% the decoded quantised (integer) values. |
||||
|
% |
||||
|
% CAUTION: due to zero padding the length of decCoeffs may be larger than |
||||
|
% the length of the encoded sequence. Simply ignore values (they should |
||||
|
% be equal to zero) that are outside the index range |
||||
|
% [1:length(encoded sequence)]. |
||||
|
|
||||
|
huffLUTi = huffLUT{huffCodebook}; |
||||
|
h=huffLUTi.invTable; |
||||
|
huffCodebook=huffLUTi.codebook; |
||||
|
nTupleSize=huffLUTi.nTupleSize; |
||||
|
maxAbsCodeVal=huffLUTi.maxAbsCodeVal; |
||||
|
signedValues=huffLUTi.signedValues; |
||||
|
eos=0;%end of stream |
||||
|
decCoeffs=[]; |
||||
|
|
||||
|
huffSec=huffSec-48; |
||||
|
streamIndex=1; |
||||
|
while(~eos)%end of stream |
||||
|
huffFailure=0; |
||||
|
wordbit=0; |
||||
|
r=1;%row indicator |
||||
|
b=huffSec(streamIndex+wordbit); |
||||
|
|
||||
|
%decoding tuple according to inverse matrix |
||||
|
while(1) |
||||
|
b=huffSec(streamIndex+wordbit); |
||||
|
wordbit=wordbit+1; |
||||
|
rOld=r; |
||||
|
r=h(rOld,b+1);%new index |
||||
|
if((h(r,1)==0)&(h(r,2)==0))%reached a leaf (found a valid word) |
||||
|
symbolIndex=h(r,3)-1;%The actual index begins from zero. |
||||
|
streamIndex=streamIndex+wordbit; |
||||
|
break; |
||||
|
end |
||||
|
end |
||||
|
%decoding nTuple magnitudes |
||||
|
if(signedValues) |
||||
|
base=2*maxAbsCodeVal+1; |
||||
|
nTupleDec=rem(floor(symbolIndex*base.^([-nTupleSize+1:1:0])),base); |
||||
|
nTupleDec=nTupleDec-maxAbsCodeVal; |
||||
|
else |
||||
|
base=maxAbsCodeVal+1; |
||||
|
nTupleDec=rem(floor(symbolIndex*base.^([-nTupleSize+1:1:0])),base); |
||||
|
nTupleSignBits=huffSec(streamIndex:streamIndex+nTupleSize-1); |
||||
|
nTupleSign=-sign(nTupleSignBits(:)'-0.5); |
||||
|
streamIndex=streamIndex+nTupleSize; |
||||
|
nTupleDec=nTupleDec.*nTupleSign; |
||||
|
end |
||||
|
escIndex=find(abs(nTupleDec)==16); |
||||
|
if((huffCodebook==11)&&(sum(escIndex))) |
||||
|
for i=1:length(escIndex) |
||||
|
b=huffSec(streamIndex); |
||||
|
N=0; |
||||
|
%reading the escape seq, counting N '1's |
||||
|
while(b) |
||||
|
N=N+1; |
||||
|
b=huffSec(streamIndex+N); |
||||
|
end%last bit read was a zero corresponding to esc_separator.. |
||||
|
streamIndex=streamIndex+N; |
||||
|
%reading the next N+4 bits |
||||
|
N4=N+4; |
||||
|
escape_word=huffSec(streamIndex+1:streamIndex+N4); |
||||
|
nTupleDec(escIndex(i))=2^N4+bin2dec(num2str(escape_word)); |
||||
|
streamIndex=streamIndex+N4+1; |
||||
|
end |
||||
|
nTupleDec(escIndex)=nTupleDec(escIndex).*nTupleSign(escIndex); |
||||
|
end |
||||
|
decCoeffs=[decCoeffs nTupleDec]; |
||||
|
if (streamIndex>length(huffSec)) |
||||
|
eos=1; |
||||
|
end |
||||
|
|
||||
|
end |
@ -0,0 +1,53 @@ |
|||||
|
function [SNR, bitrate, compression] = demoAAC3(fNameIn, fNameOut, frameAACoded) |
||||
|
%Function that demonstrates usage of the level 3 code |
||||
|
% Usage [SNR, bitrate, compression] = demoAAC3(fNameIn, fNameOut, frameAACoded), where: |
||||
|
% Inputs |
||||
|
% - fNameIn is the filename and path of the file to encode |
||||
|
% - fNameOut is the filename and path of the wav file that will be |
||||
|
% written after decoding |
||||
|
% - frameAACoded is the filename and path of the mat file that will |
||||
|
% be written after encoding |
||||
|
% |
||||
|
% Output |
||||
|
% - SNR is the signal to noise ration computed after successively |
||||
|
% encoding and decoding the audio signal |
||||
|
% - bitrate is the bits per second |
||||
|
% - compression is the ratio of the bitrate before the encoding to |
||||
|
% the bitrate after it |
||||
|
|
||||
|
AACSeq3 = AACoder3(fNameIn, frameAACoded); |
||||
|
decodedAudio = iAACoder3(AACSeq3, fNameOut); |
||||
|
|
||||
|
[audioData, ~] = audioread(fNameIn); |
||||
|
|
||||
|
% figure() |
||||
|
% plot(audioData(1025:length(decodedAudio)-1024, 1) - decodedAudio(1025:end-1024, 1)) |
||||
|
% for frame = 1:length(AACSeq1) |
||||
|
% if strcmp(AACSeq1(frame).frameType, 'LSS') || strcmp(AACSeq1(frame).frameType, 'LPS') |
||||
|
% % Add lines |
||||
|
% h1 = line([(frame-1)*1024+1 (frame-1)*1024+1],[-2*10^(-15) 2*10^(-15)]); |
||||
|
% h2 = line([frame*1024+1 frame*1024+1],[-2*10^(-15) 2*10^(-15)]); |
||||
|
% % Set properties of lines |
||||
|
% set([h1 h2],'Color','y','LineWidth',2) |
||||
|
% % Add a patch |
||||
|
% patch([(frame-1)*1024+1 frame*1024+1 frame*1024+1 (frame-1)*1024+1],[-2*10^(-15) -2*10^(-15) 2*10^(-15) 2*10^(-15)],'y') |
||||
|
% elseif strcmp(AACSeq1(frame).frameType, 'ESH') |
||||
|
% % Add lines |
||||
|
% h1 = line([(frame-1)*1024+1 (frame-1)*1024+1],[-2*10^(-15) 2*10^(-15)]); |
||||
|
% h2 = line([frame*1024+1 frame*1024+1],[-2*10^(-15) 2*10^(-15)]); |
||||
|
% % Set properties of lines |
||||
|
% set([h1 h2],'Color','r','LineWidth',2) |
||||
|
% % Add a patch |
||||
|
% patch([(frame-1)*1024+1 frame*1024+1 frame*1024+1 (frame-1)*1024+1],[-2*10^(-15) -2*10^(-15) 2*10^(-15) 2*10^(-15)],'r') |
||||
|
% end |
||||
|
% end |
||||
|
% set(gca,'children',flipud(get(gca,'children'))) |
||||
|
% |
||||
|
% figure() |
||||
|
% plot(audioData(1025:length(decodedAudio)-1024, 2) - decodedAudio(1025:end-1024, 2)) |
||||
|
|
||||
|
snr(audioData(1025:length(decodedAudio)-1024, 1), audioData(1025:length(decodedAudio)-1024, 1) - decodedAudio(1025:end-1024, 1)) |
||||
|
snr(audioData(1025:length(decodedAudio)-1024, 2), audioData(1025:length(decodedAudio)-1024, 2) - decodedAudio(1025:end-1024, 2)) |
||||
|
SNR = 10*log10((sum(audioData(1025:length(decodedAudio)-1024, :)) .^ 2) ./ ... |
||||
|
(sum(audioData(1025:length(decodedAudio)-1024, :) - decodedAudio(1025:end-1024, :)) .^ 2)); |
||||
|
end |
@ -0,0 +1,194 @@ |
|||||
|
function [huffSec, huffCodebook] = encodeHuff(coeffSec, huffLUT, forcedCodebook) |
||||
|
% ENCODEHUFF Huffman encoder stage |
||||
|
% |
||||
|
% [huffSec, huffCodebook] = encodeHuff(coeffSec, huffLUT) performs |
||||
|
% huffman coding for quantised (integer) values of a section coeffSec; |
||||
|
% huffLUT are the Huffman look-up tables to be loaded using loadLUT.m |
||||
|
% |
||||
|
% It returns huffSec, a string of '1' and '0' corresponding to the |
||||
|
% Huffman encoded stream, and huffCodebook, the number of the Huffman |
||||
|
% codebook used (see page 147 in w2203tfa.pdf and pages 82-94 in |
||||
|
% w2203tft.pdf). |
||||
|
% |
||||
|
% [huffSec, huffCodebook] = encodeHuff(coeffSec, huffLUT, forcedCodebook) |
||||
|
% forces the codebook forcedCodebook to be used. |
||||
|
|
||||
|
if nargin == 2 |
||||
|
[huffSec, huffCodebook] = encodeHuff1(coeffSec, huffLUT); |
||||
|
else |
||||
|
huffSec = huffLUTCode1(huffLUT{forcedCodebook}, coeffSec); |
||||
|
huffCodebook = forcedCodebook; |
||||
|
end |
||||
|
|
||||
|
function [huffSec, huffCodebook]=encodeHuff1(coeffSec,huffLUT) |
||||
|
maxAbsVal=max(abs(coeffSec)); |
||||
|
ESC=0;%escape sequence used? |
||||
|
switch(maxAbsVal) |
||||
|
case 0 |
||||
|
huffCodebook=0; |
||||
|
[huffSec]=huffLUTCode0(); |
||||
|
case 1 |
||||
|
huffCodebook=[1 2]; |
||||
|
% signedValues=1; |
||||
|
% nTupleSize=4; |
||||
|
% maxAbsCodeVal=1; |
||||
|
[huffSec1]=huffLUTCode1(huffLUT{huffCodebook(1)}, coeffSec); |
||||
|
[huffSec2]=huffLUTCode1(huffLUT{huffCodebook(2)}, coeffSec); |
||||
|
if(length(huffSec1)<=length(huffSec2)) |
||||
|
huffSec=huffSec1; |
||||
|
huffCodebook=huffCodebook(1); |
||||
|
else |
||||
|
huffSec=huffSec2; |
||||
|
huffCodebook=huffCodebook(2); |
||||
|
end |
||||
|
case 2 |
||||
|
huffCodebook=[3 4]; |
||||
|
% signedValues=0; |
||||
|
% nTupleSize=4; |
||||
|
% maxAbsCodeVal=2; |
||||
|
[huffSec1]=huffLUTCode1(huffLUT{huffCodebook(1)}, coeffSec); |
||||
|
[huffSec2]=huffLUTCode1(huffLUT{huffCodebook(2)}, coeffSec); |
||||
|
if(length(huffSec1)<=length(huffSec2)) |
||||
|
huffSec=huffSec1; |
||||
|
huffCodebook=huffCodebook(1); |
||||
|
else |
||||
|
huffSec=huffSec2; |
||||
|
huffCodebook=huffCodebook(2); |
||||
|
end |
||||
|
case {3,4} |
||||
|
huffCodebook=[5 6]; |
||||
|
[huffSec1]=huffLUTCode1(huffLUT{huffCodebook(1)}, coeffSec); |
||||
|
[huffSec2]=huffLUTCode1(huffLUT{huffCodebook(2)}, coeffSec); |
||||
|
if(length(huffSec1)<=length(huffSec2)) |
||||
|
huffSec=huffSec1; |
||||
|
huffCodebook=huffCodebook(1); |
||||
|
else |
||||
|
huffSec=huffSec2; |
||||
|
huffCodebook=huffCodebook(2); |
||||
|
end |
||||
|
case {5,6,7} |
||||
|
huffCodebook=[7 8]; |
||||
|
[huffSec1]=huffLUTCode1(huffLUT{huffCodebook(1)}, coeffSec); |
||||
|
[huffSec2]=huffLUTCode1(huffLUT{huffCodebook(2)}, coeffSec); |
||||
|
if(length(huffSec1)<=length(huffSec2)) |
||||
|
huffSec=huffSec1; |
||||
|
huffCodebook=huffCodebook(1); |
||||
|
else |
||||
|
huffSec=huffSec2; |
||||
|
huffCodebook=huffCodebook(2); |
||||
|
end |
||||
|
case {8,9,10,11,12} |
||||
|
huffCodebook=[9 10]; |
||||
|
[huffSec1]=huffLUTCode1(huffLUT{huffCodebook(1)}, coeffSec); |
||||
|
[huffSec2]=huffLUTCode1(huffLUT{huffCodebook(2)}, coeffSec); |
||||
|
if(length(huffSec1)<=length(huffSec2)) |
||||
|
huffSec=huffSec1; |
||||
|
huffCodebook=huffCodebook(1); |
||||
|
else |
||||
|
huffSec=huffSec2; |
||||
|
huffCodebook=huffCodebook(2); |
||||
|
end |
||||
|
case {13,14,15} |
||||
|
huffCodebook=11; |
||||
|
[huffSec]=huffLUTCode1(huffLUT{huffCodebook(1)}, coeffSec); |
||||
|
otherwise |
||||
|
huffCodebook=11; |
||||
|
[huffSec]=huffLUTCodeESC(huffLUT{huffCodebook(1)}, coeffSec); |
||||
|
end |
||||
|
|
||||
|
|
||||
|
|
||||
|
function [huffSec]=huffLUTCode1(huffLUT, coeffSec) |
||||
|
LUT=huffLUT.LUT; |
||||
|
huffCodebook=huffLUT.codebook; |
||||
|
nTupleSize=huffLUT.nTupleSize; |
||||
|
maxAbsCodeVal=huffLUT.maxAbsCodeVal; |
||||
|
signedValues=huffLUT.signedValues; |
||||
|
numTuples=ceil(length(coeffSec)/nTupleSize); |
||||
|
if(signedValues) |
||||
|
coeffSec=coeffSec+maxAbsCodeVal; |
||||
|
base=2*maxAbsCodeVal+1; |
||||
|
else |
||||
|
base=maxAbsCodeVal+1; |
||||
|
end |
||||
|
coeffSecPad=zeros(1,numTuples*nTupleSize); |
||||
|
coeffSecPad(1:length(coeffSec))=coeffSec; |
||||
|
for i=1:numTuples |
||||
|
nTuple=coeffSecPad((i-1)*nTupleSize+1:i*nTupleSize); |
||||
|
huffIndex=abs(nTuple)*(base.^[nTupleSize-1:-1:0])'; |
||||
|
hexHuff=LUT(huffIndex+1,3); |
||||
|
|
||||
|
hexHuff=dec2hex(hexHuff);%Dec values were saved. Converting to hex |
||||
|
huffSecLen=LUT(huffIndex+1,2); |
||||
|
if(signedValues) |
||||
|
huffSec{i}=dec2bin(hex2dec(hexHuff),huffSecLen); |
||||
|
else |
||||
|
huffSec{i}=[dec2bin(hex2dec(hexHuff),huffSecLen),strcat((num2str(nTuple'<0))')];%appending sign |
||||
|
end |
||||
|
end |
||||
|
huffSec=strcat([huffSec{:}]); |
||||
|
|
||||
|
function [huffSec]=huffLUTCode0() |
||||
|
huffSec=''; |
||||
|
|
||||
|
function [huffSec]=huffLUTCodeESC(huffLUT, coeffSec) |
||||
|
LUT=huffLUT.LUT; |
||||
|
huffCodebook=huffLUT.codebook; |
||||
|
nTupleSize=huffLUT.nTupleSize; |
||||
|
maxAbsCodeVal=huffLUT.maxAbsCodeVal; |
||||
|
signedValues=huffLUT.signedValues; |
||||
|
|
||||
|
numTuples=ceil(length(coeffSec)/nTupleSize); |
||||
|
base=maxAbsCodeVal+1; |
||||
|
coeffSecPad=zeros(1,numTuples*nTupleSize); |
||||
|
coeffSecPad(1:length(coeffSec))=coeffSec; |
||||
|
|
||||
|
nTupleOffset=zeros(1,nTupleSize); |
||||
|
for i=1:numTuples |
||||
|
nTuple=coeffSecPad((i-1)*nTupleSize+1:i*nTupleSize); |
||||
|
lnTuple=nTuple; |
||||
|
lnTuple(lnTuple==0)=eps; |
||||
|
N4=max([0 0; floor(log2(abs(lnTuple)))]); |
||||
|
N=max([0 0 ; N4-4]); |
||||
|
esc=abs(nTuple)>15; |
||||
|
|
||||
|
nTupleESC=nTuple; |
||||
|
nTupleESC(esc)=sign(nTupleESC(esc))*16;%Just keep the sixteens here (nTupleESC). nTuple contains the actual values |
||||
|
|
||||
|
huffIndex=abs(nTupleESC)*(base.^[nTupleSize-1:-1:0])'; |
||||
|
hexHuff=LUT(huffIndex+1,3); |
||||
|
hexHuff=dec2hex(hexHuff);%Dec values were saved. Converting to hex |
||||
|
huffSecLen=LUT(huffIndex+1,2); |
||||
|
|
||||
|
%Adding sufficient ones to the prefix. If N<=0 empty string created |
||||
|
escape_prefix1=''; |
||||
|
escape_prefix2=''; |
||||
|
escape_prefix1(1:N(1))='1'; |
||||
|
escape_prefix2(1:N(2))='1'; |
||||
|
|
||||
|
|
||||
|
%Calculating the escape words. Taking absolute values. Will add the |
||||
|
%sign later on |
||||
|
if esc(1) |
||||
|
escape_separator1='0'; |
||||
|
escape_word1=dec2bin(abs(nTuple(1))-2^(N4(1)),N4(1)); |
||||
|
else |
||||
|
escape_separator1=''; |
||||
|
escape_word1=''; |
||||
|
end |
||||
|
if esc(2) |
||||
|
escape_separator2='0'; |
||||
|
escape_word2=dec2bin(abs(nTuple(2))-2^(N4(2)),N4(2)); |
||||
|
else |
||||
|
escape_separator2=''; |
||||
|
escape_word2=''; |
||||
|
end |
||||
|
|
||||
|
% escape_word1=dec2bin(abs(nTuple(1))-2^(N4(1)),N4(1)); |
||||
|
|
||||
|
escSeq=[escape_prefix1, escape_separator1, escape_word1, escape_prefix2, escape_separator2, escape_word2]; |
||||
|
|
||||
|
%adding the sign bits and the escape sequence |
||||
|
huffSec{i}=[dec2bin(hex2dec(hexHuff),huffSecLen),strcat((num2str(nTuple'<0))'), escSeq];%appending sign |
||||
|
end |
||||
|
huffSec=strcat([huffSec{:}]); |
@ -0,0 +1,100 @@ |
|||||
|
function frameF = filterbank(frameT, frameType, winType) |
||||
|
%Implementation of the Filter Bank step |
||||
|
% Usage frameF = filterbank(frameT, frameType, winType), where: |
||||
|
% Inputs |
||||
|
% - frameT is a frame in the time domain, containing both channels of |
||||
|
% the audio stored in an array of dimensions 2048X2 |
||||
|
% - frameType is the type of the current frame in string |
||||
|
% representation, can be one of "OLS" (ONLY_LONG_SEQUENCE), "LSS" |
||||
|
% (LONG_START_SEQUENCE), "ESH" (EIGHT_SHORT_SEQUENCE), "LPS" |
||||
|
% (LONG_STOP_SEQUENCE) |
||||
|
% - winType is the type of the window selected, can be one of "KBD", |
||||
|
% "SIN" |
||||
|
% |
||||
|
% Output |
||||
|
% - frameF is the frame in the frequency domain, in MDCT coefficients |
||||
|
% representation containing both channels of the audio stored in an |
||||
|
% array of dimensions 1024X2 |
||||
|
|
||||
|
% Declares persistent windows variables and initializes if empty |
||||
|
persistent kaiserWindowLong kaiserWindowShort sinWindowLong sinWindowShort; |
||||
|
if isempty(kaiserWindowLong) || isempty(kaiserWindowShort) || ... |
||||
|
isempty(sinWindowLong) || isempty(sinWindowShort) |
||||
|
kaiserLong = kaiser(1025, 6*pi); |
||||
|
kaiserSumLong = sum(kaiserLong); |
||||
|
kaiserShort = kaiser(129, 4*pi); |
||||
|
kaiserSumShort = sum(kaiserShort); |
||||
|
|
||||
|
kaiserWindowLong(1:1024) = movsum(kaiserLong(1:1024), [1024 0]); |
||||
|
kaiserWindowLong(1025:2048) = movsum(flipud(kaiserLong(1:1024)), [0 1024]); |
||||
|
kaiserWindowLong = sqrt(kaiserWindowLong ./ kaiserSumLong); |
||||
|
|
||||
|
sinWindowLong = sin(pi * ((0:2047) + 0.5) / 2048); |
||||
|
|
||||
|
kaiserWindowShort(1:128) = movsum(kaiserShort(1:128), [128 0]); |
||||
|
kaiserWindowShort(129:256) = movsum(flipud(kaiserShort(1:128)), [0 128]); |
||||
|
kaiserWindowShort = sqrt(kaiserWindowShort ./ kaiserSumShort); |
||||
|
|
||||
|
sinWindowShort = sin(pi * ((0:255) + 0.5) / 256); |
||||
|
end |
||||
|
|
||||
|
% Initializes output array |
||||
|
frameF(1024, 2) = 0; |
||||
|
|
||||
|
% Applies appropriate window to the frame |
||||
|
if strcmp(frameType, 'OLS') |
||||
|
if strcmp(winType, 'KBD') |
||||
|
frameT = bsxfun(@times, frameT, kaiserWindowLong'); |
||||
|
elseif strcmp(winType, 'SIN') |
||||
|
frameT = bsxfun(@times, frameT, sinWindowLong'); |
||||
|
else |
||||
|
error('filterbank, l[20]: Unsupported window type input!') |
||||
|
end |
||||
|
|
||||
|
frameF = mdct4(frameT); |
||||
|
elseif strcmp(frameType, 'LSS') |
||||
|
if strcmp(winType, 'KBD') |
||||
|
frameT(1:1024, :) = bsxfun(@times, frameT(1:1024, :), kaiserWindowLong(1:1024)'); |
||||
|
frameT(1473:1600, :) = bsxfun(@times, frameT(1473:1600, :), kaiserWindowShort(129:end)'); |
||||
|
frameT(1601:end, :) = 0; |
||||
|
elseif strcmp(winType, 'SIN') |
||||
|
frameT(1:1024, :) = bsxfun(@times, frameT(1:1024, :), sinWindowLong(1:1024)'); |
||||
|
frameT(1473:1600, :) = bsxfun(@times, frameT(1473:1600, :), sinWindowShort(129:end)'); |
||||
|
frameT(1601:end, :) = 0; |
||||
|
else |
||||
|
error('filterbank, l[20]: Unsupported window type input!') |
||||
|
end |
||||
|
|
||||
|
frameF = mdct4(frameT); |
||||
|
elseif strcmp(frameType, 'LPS') |
||||
|
if strcmp(winType, 'KBD') |
||||
|
frameT(1:448, :) = 0; |
||||
|
frameT(449:576, :) = bsxfun(@times, frameT(449:576, :), kaiserWindowShort(1:128)'); |
||||
|
frameT(1025:end, :) = bsxfun(@times, frameT(1025:end, :), kaiserWindowLong(1025:end)'); |
||||
|
elseif strcmp(winType, 'SIN') |
||||
|
frameT(1:448, :) = 0; |
||||
|
frameT(449:576, :) = bsxfun(@times, frameT(449:576, :), sinWindowShort(1:128)'); |
||||
|
frameT(1025:end, :) = bsxfun(@times, frameT(1025:end, :), sinWindowLong(1025:end)'); |
||||
|
else |
||||
|
error('filterbank, l[20]: Unsupported window type input!') |
||||
|
end |
||||
|
|
||||
|
frameF = mdct4(frameT); |
||||
|
elseif strcmp(frameType, 'ESH') |
||||
|
for channel = 1:2 |
||||
|
% Splits the frame into sub-frames |
||||
|
[subFrames, ~] = buffer(frameT(449:end-448, channel), 256, 128, 'nodelay'); |
||||
|
|
||||
|
if strcmp(winType, 'KBD') |
||||
|
subFrames = subFrames .* repmat(kaiserWindowShort', [1 8]); |
||||
|
elseif strcmp(winType, 'SIN') |
||||
|
subFrames = subFrames .* repmat(sinWindowShort', [1 8]); |
||||
|
end |
||||
|
|
||||
|
for subFrameIndex = 1:8 |
||||
|
frameF((subFrameIndex - 1) * 128 + 1:subFrameIndex * 128, channel) = ... |
||||
|
mdct4(subFrames(:, subFrameIndex)); |
||||
|
end |
||||
|
end |
||||
|
end |
||||
|
end |
Binary file not shown.
Binary file not shown.
@ -0,0 +1,73 @@ |
|||||
|
function x = iAACoder3(AACSeq3, fNameOut) |
||||
|
%Implementation of AAC decoder |
||||
|
% Usage x = iAACoder3(AACSeq3, fNameOut), where: |
||||
|
% Inputs |
||||
|
% - fNameOut is the filename and path of the file that will be |
||||
|
% written after decoding |
||||
|
% - AACSeq3 is an array of structs containing K structs, where K is |
||||
|
% the number of computed frames. Every struct of the array consists |
||||
|
% of: |
||||
|
% * a frameType, |
||||
|
% * a winType, |
||||
|
% * chl.TNScoeffs which are the quantized TNS coefficients of |
||||
|
% this frame's left channel, |
||||
|
% * chr.TNScoeffs which are the quantized TNS coefficients of |
||||
|
% this frame's right channel, |
||||
|
% * chl.T which are the psychoacoustic thresholds of this frame's |
||||
|
% left channel, |
||||
|
% * chr.T which are the psychoacoustic thresholds of this frame's |
||||
|
% right channel, |
||||
|
% * chl.G which are the quantized global gains of this frame's |
||||
|
% left channel, |
||||
|
% * chr.G which are the quantized global gains of this frame's |
||||
|
% right channel, |
||||
|
% * chl.sfc which is the Huffman encoded sfc sequence of this |
||||
|
% frame's left channel, |
||||
|
% * chr.sfc which is the Huffman encoded sfc sequence of this |
||||
|
% frame's right channel, |
||||
|
% * chl.stream which is the Huffman encoded quantized MDCT |
||||
|
% sequence of this frame's left channel, |
||||
|
% * chr.stream which is the Huffman encoded quantized MDCT |
||||
|
% sequence of this frame's right channel, |
||||
|
% * chl.codebook which is the Huffman codebook used for this |
||||
|
% frame's left channel |
||||
|
% * chr.codebook which is the Huffman codebook used for this |
||||
|
% frame's right channel |
||||
|
% |
||||
|
% Output |
||||
|
% - x is an array containing the decoded audio samples |
||||
|
|
||||
|
% Initializes an array to hold the decoded samples |
||||
|
decodedAudio(1024 * (length(AACSeq3) + 1), 2) = 0; |
||||
|
% Initializes an array to hold both audio channels |
||||
|
frameF(1024, 2) = 0; |
||||
|
|
||||
|
% Decodes audio file |
||||
|
huffLUT = loadLUT(); |
||||
|
for i = 0:length(AACSeq3) - 1 |
||||
|
currFrameStart = i * 1024 + 1; |
||||
|
currFrameStop = currFrameStart + 2047; |
||||
|
|
||||
|
SL = decodeHuff(AACSeq3(i + 1).chl.stream, ... |
||||
|
AACSeq3(i + 1).chl.codebook, huffLUT); |
||||
|
SR = decodeHuff(AACSeq3(i + 1).chr.stream, ... |
||||
|
AACSeq3(i + 1).chr.codebook, huffLUT); |
||||
|
|
||||
|
sfcL = decodeHuff(AACSeq3(i + 1).chl.sfc, 12, huffLUT); % TODO: maybe LUT(12); |
||||
|
sfcR = decodeHuff(AACSeq3(i + 1).chr.sfc, 12, huffLUT); |
||||
|
|
||||
|
frameF(:, 1) = iAACquantizer(SL, sfcL, AACSeq3(i + 1).chl.G, AACSeq3(i+1).frameType); |
||||
|
frameF(:, 2) = iAACquantizer(SR, sfcR, AACSeq3(i + 1).chr.G, AACSeq3(i+1).frameType); |
||||
|
|
||||
|
TNScoeffsL = AACSeq3(i + 1).chl.TNScoeffs; |
||||
|
TNScoeffsR = AACSeq3(i + 1).chr.TNScoeffs; |
||||
|
frameF(:, 1) = iTNS(frameF(:, 1), AACSeq3(i+1).frameType, TNScoeffsL); |
||||
|
frameF(:, 2) = iTNS(frameF(:, 2), AACSeq3(i+1).frameType, TNScoeffsR); |
||||
|
frameT = iFilterbank(frameF, AACSeq3(i+1).frameType, AACSeq3(i+1).winType); |
||||
|
|
||||
|
decodedAudio(currFrameStart:currFrameStop, :) = decodedAudio(currFrameStart:currFrameStop, :) + frameT; |
||||
|
end |
||||
|
|
||||
|
audiowrite(fNameOut, decodedAudio, 48000); |
||||
|
x = decodedAudio; |
||||
|
end |
@ -0,0 +1,24 @@ |
|||||
|
function frameF = iAACquantizer(S, sfc, G, frameType) |
||||
|
%Implementation of the inverse Quantizer |
||||
|
% Usage frameF = iAACquantizer(S, sfc, G, frameType), where: |
||||
|
% Inputs |
||||
|
% - S are the MDCT quantization symbols of one audio channel stored |
||||
|
% in a vector of length 1024 |
||||
|
% - sfc are the scalefactors per band stored in an array of |
||||
|
% dimensions NBX8 for EIGHT_SHORT_SEQUENCE frames and NBX1 |
||||
|
% otherwise, where NB is the number of bands |
||||
|
% - G is the global gain stored in an array of dimensions 1X8 for |
||||
|
% EIGHT_SHORT_SEQUENCE frames and a single value otherwise |
||||
|
% - frameType is the type of the current frame in string |
||||
|
% representation, can be one of "OLS" (ONLY_LONG_SEQUENCE), "LSS" |
||||
|
% (LONG_START_SEQUENCE), "ESH" (EIGHT_SHORT_SEQUENCE), "LPS" |
||||
|
% (LONG_STOP_SEQUENCE) |
||||
|
% |
||||
|
% Output |
||||
|
% - frameF is the frame in the frequency domain, in MDCT coefficients |
||||
|
% representation containing only one of the audio channels stored in |
||||
|
% a vector of length 1024 |
||||
|
|
||||
|
|
||||
|
end |
||||
|
|
@ -0,0 +1,99 @@ |
|||||
|
function frameT = iFilterbank(frameF, frameType, winType) |
||||
|
%Implementation of the Inverse Filter Bank step |
||||
|
% Usage frameT = iFilterbank(frameF, frameType, winType), where: |
||||
|
% Inputs |
||||
|
% - frameF is the frame in the frequency domain, in MDCT coefficients |
||||
|
% representation containing both channels of the audio stored in an |
||||
|
% array of dimensions 1024X2 |
||||
|
% - frameType is the type of the current frame in string |
||||
|
% representation, can be one of "OLS" (ONLY_LONG_SEQUENCE), "LSS" |
||||
|
% (LONG_START_SEQUENCE), "ESH" (EIGHT_SHORT_SEQUENCE), "LPS" |
||||
|
% (LONG_STOP_SEQUENCE) |
||||
|
% - winType is the type of the window selected, can be one of "KBD", |
||||
|
% "SIN" |
||||
|
% |
||||
|
% Output |
||||
|
% - frameT is a frame in the time domain, containing both channels of |
||||
|
% the audio stored in an array of dimensions 2048X2 |
||||
|
|
||||
|
% Declares persistent windows variables and initializes if empty |
||||
|
persistent kaiserWindowLong kaiserWindowShort sinWindowLong sinWindowShort; |
||||
|
if isempty(kaiserWindowLong) || isempty(kaiserWindowShort) || ... |
||||
|
isempty(sinWindowLong) || isempty(sinWindowShort) |
||||
|
kaiserLong = kaiser(1025, 6*pi); |
||||
|
kaiserSumLong = sum(kaiserLong); |
||||
|
kaiserShort = kaiser(129, 4*pi); |
||||
|
kaiserSumShort = sum(kaiserShort); |
||||
|
|
||||
|
kaiserWindowLong(1:1024) = movsum(kaiserLong(1:1024), [1024 0]); |
||||
|
kaiserWindowLong(1025:2048) = movsum(flipud(kaiserLong(1:1024)), [0 1024]); |
||||
|
kaiserWindowLong = sqrt(kaiserWindowLong ./ kaiserSumLong); |
||||
|
|
||||
|
sinWindowLong = sin(pi * ((0:2047) + 0.5) / 2048); |
||||
|
|
||||
|
kaiserWindowShort(1:128) = movsum(kaiserShort(1:128), [128 0]); |
||||
|
kaiserWindowShort(129:256) = movsum(flipud(kaiserShort(1:128)), [0 128]); |
||||
|
kaiserWindowShort = sqrt(kaiserWindowShort ./ kaiserSumShort); |
||||
|
|
||||
|
sinWindowShort = sin(pi * ((0:255) + 0.5) / 256); |
||||
|
end |
||||
|
|
||||
|
% Initializes output array |
||||
|
frameT(2048, 2) = 0; |
||||
|
|
||||
|
% Applies appropriate window to the frame |
||||
|
if strcmp(frameType, 'OLS') |
||||
|
frameT = imdct4(frameF); |
||||
|
|
||||
|
if strcmp(winType, 'KBD') |
||||
|
frameT = bsxfun(@times, frameT, kaiserWindowLong'); |
||||
|
elseif strcmp(winType, 'SIN') |
||||
|
frameT = bsxfun(@times, frameT, sinWindowLong'); |
||||
|
else |
||||
|
error('filterbank, l[20]: Unsupported window type input!') |
||||
|
end |
||||
|
elseif strcmp(frameType, 'LSS') |
||||
|
frameT = imdct4(frameF); |
||||
|
|
||||
|
if strcmp(winType, 'KBD') |
||||
|
frameT(1:1024, :) = bsxfun(@times, frameT(1:1024, :), kaiserWindowLong(1:1024)'); |
||||
|
frameT(1473:1600, :) = bsxfun(@times, frameT(1473:1600, :), kaiserWindowShort(129:end)'); |
||||
|
frameT(1601:end, :) = 0; |
||||
|
elseif strcmp(winType, 'SIN') |
||||
|
frameT(1:1024, :) = bsxfun(@times, frameT(1:1024, :), sinWindowLong(1:1024)'); |
||||
|
frameT(1473:1600, :) = bsxfun(@times, frameT(1473:1600, :), sinWindowShort(129:end)'); |
||||
|
frameT(1601:end, :) = 0; |
||||
|
else |
||||
|
error('filterbank, l[20]: Unsupported window type input!') |
||||
|
end |
||||
|
elseif strcmp(frameType, 'LPS') |
||||
|
frameT = imdct4(frameF); |
||||
|
|
||||
|
if strcmp(winType, 'KBD') |
||||
|
frameT(1:448, :) = 0; |
||||
|
frameT(449:576, :) = bsxfun(@times, frameT(449:576, :), kaiserWindowShort(1:128)'); |
||||
|
frameT(1025:end, :) = bsxfun(@times, frameT(1025:end, :), kaiserWindowLong(1025:end)'); |
||||
|
elseif strcmp(winType, 'SIN') |
||||
|
frameT(1:448, :) = 0; |
||||
|
frameT(449:576, :) = bsxfun(@times, frameT(449:576, :), sinWindowShort(1:128)'); |
||||
|
frameT(1025:end, :) = bsxfun(@times, frameT(1025:end, :), sinWindowLong(1025:end)'); |
||||
|
else |
||||
|
error('filterbank, l[20]: Unsupported window type input!') |
||||
|
end |
||||
|
elseif strcmp(frameType, 'ESH') |
||||
|
for channel=1:2 |
||||
|
for subFrameIndex = 1:8 |
||||
|
subFrame = imdct4(frameF((subFrameIndex - 1) * 128 + 1:subFrameIndex * 128, channel)); |
||||
|
|
||||
|
if strcmp(winType, 'KBD') |
||||
|
subFrame = subFrame .* kaiserWindowShort'; |
||||
|
elseif strcmp(winType, 'SIN') |
||||
|
subFrame = subFrame .* sinWindowShort'; |
||||
|
end |
||||
|
|
||||
|
frameT(448 + (subFrameIndex - 1) * 128 + 1:448 + (subFrameIndex + 1) * 128, channel) = ... |
||||
|
frameT(448 + (subFrameIndex - 1) * 128 + 1:448 + (subFrameIndex + 1) * 128, channel) + subFrame; |
||||
|
end |
||||
|
end |
||||
|
end |
||||
|
end |
@ -0,0 +1,38 @@ |
|||||
|
function frameFout = iTNS(frameFin, frameType, TNScoeffs) |
||||
|
%Implementation of the reverse TNS step |
||||
|
% Usage frameFout = iTNS(frameFin, frameType, TNScoeffs), where: |
||||
|
% Inputs |
||||
|
% - frameFin is the frame in the frequency domain, in MDCT coefficients |
||||
|
% representation containing both channels of the audio stored in an |
||||
|
% array of dimensions 1024X2 |
||||
|
% - frameType is the type of the current frame in string |
||||
|
% representation, can be one of "OLS" (ONLY_LONG_SEQUENCE), "LSS" |
||||
|
% (LONG_START_SEQUENCE), "ESH" (EIGHT_SHORT_SEQUENCE), "LPS" |
||||
|
% (LONG_STOP_SEQUENCE) |
||||
|
% - TNScoeffs is the quantized TNS coefficients array of dimensions |
||||
|
% 4X8 for EIGHT_SHORT_SEQUENCE frames and 4X1 otherwise |
||||
|
% |
||||
|
% Output |
||||
|
% - frameFout is the frame in the frequency domain after Temporal Noise |
||||
|
% Shaping, in MDCT coefficients representation containing both channels |
||||
|
% of the audio stored in an array of dimensions 1024X2 |
||||
|
|
||||
|
% Declares persistent variable holding the TNS tables and initializes if empty |
||||
|
persistent TNSTables; |
||||
|
if isempty(TNSTables) |
||||
|
TNSTables = load('TableB219.mat'); |
||||
|
end |
||||
|
|
||||
|
if ~strcmp(frameType, 'ESH') |
||||
|
% Inverses MDCT coefficients filtering |
||||
|
frameFout = filter(1, [1 TNScoeffs], frameFin); |
||||
|
else |
||||
|
for subFrameIndex = 1:8 |
||||
|
subFrame = frameFin((subFrameIndex - 1) * 128 + 1:subFrameIndex * 128); |
||||
|
|
||||
|
% Inverses MDCT coefficients filtering |
||||
|
frameFout((subFrameIndex - 1) * 128 + 1:subFrameIndex * 128) = ... |
||||
|
filter(1, [1 TNScoeffs((subFrameIndex - 1) * 4 + 1:subFrameIndex * 4)], subFrame); |
||||
|
end |
||||
|
end |
||||
|
end |
@ -0,0 +1,61 @@ |
|||||
|
|
||||
|
function y = imdct4(x) |
||||
|
% IMDCT4 Calculates the Modified Discrete Cosine Transform |
||||
|
% y = imdct4(x) |
||||
|
% |
||||
|
% x: input signal (can be either a column or frame per column) |
||||
|
% y: IMDCT of x |
||||
|
% |
||||
|
% Vectorize ! ! ! |
||||
|
|
||||
|
% ------- imdct4.m ----------------------------------------- |
||||
|
% Marios Athineos, marios@ee.columbia.edu |
||||
|
% http://www.ee.columbia.edu/~marios/ |
||||
|
% Copyright (c) 2002 by Columbia University. |
||||
|
% All rights reserved. |
||||
|
% ---------------------------------------------------------- |
||||
|
|
||||
|
[flen,fnum] = size(x); |
||||
|
% Make column if it's a single row |
||||
|
if (flen==1) |
||||
|
x = x(:); |
||||
|
flen = fnum; |
||||
|
fnum = 1; |
||||
|
end |
||||
|
|
||||
|
% We need these for furmulas below |
||||
|
N = flen; |
||||
|
M = N/2; |
||||
|
twoN = 2*N; |
||||
|
sqrtN = sqrt(twoN); |
||||
|
|
||||
|
% We need this twice so keep it around |
||||
|
t = (0:(M-1)).'; |
||||
|
w = diag(sparse(exp(-j*2*pi*(t+1/8)/twoN))); |
||||
|
|
||||
|
% Pre-twiddle |
||||
|
t = (0:(M-1)).'; |
||||
|
c = x(2*t+1,:) + j*x(N-1-2*t+1,:); |
||||
|
c = (0.5*w)*c; |
||||
|
|
||||
|
% FFT for N/2 points only !!! |
||||
|
c = fft(c,M); |
||||
|
|
||||
|
% Post-twiddle |
||||
|
c = ((8/sqrtN)*w)*c; |
||||
|
|
||||
|
% Preallocate rotation matrix |
||||
|
rot = zeros(twoN,fnum); |
||||
|
|
||||
|
% Sort |
||||
|
t = (0:(M-1)).'; |
||||
|
rot(2*t+1,:) = real(c(t+1,:)); |
||||
|
rot(N+2*t+1,:) = imag(c(t+1,:)); |
||||
|
t = (1:2:(twoN-1)).'; |
||||
|
rot(t+1,:) = -rot(twoN-1-t+1,:); |
||||
|
|
||||
|
% Shift |
||||
|
t = (0:(3*M-1)).'; |
||||
|
y(t+1,:) = rot(t+M+1,:); |
||||
|
t = (3*M:(twoN-1)).'; |
||||
|
y(t+1,:) = -rot(t-3*M+1,:); |
@ -0,0 +1,50 @@ |
|||||
|
function huffLUT = loadLUT() |
||||
|
load huffCodebooks |
||||
|
load huffCodebookSF |
||||
|
huffCodebooks{end + 1} = huffCodebookSF; |
||||
|
for i=1:12 |
||||
|
h=huffCodebooks{i}(:,3); |
||||
|
hlength=huffCodebooks{i}(:,2); |
||||
|
for j=1:length(h) |
||||
|
hbin{j}=dec2bin(h(j),hlength(j)); |
||||
|
end |
||||
|
invTable{i}=vlcTable(hbin); |
||||
|
clear hbin; |
||||
|
end |
||||
|
huffLUT{1}=struct('LUT',huffCodebooks{1},'invTable',invTable{1},'codebook',1,'nTupleSize',4,'maxAbsCodeVal',1,'signedValues',1); |
||||
|
huffLUT{2}=struct('LUT',huffCodebooks{2},'invTable',invTable{2},'codebook',2,'nTupleSize',4,'maxAbsCodeVal',1,'signedValues',1); |
||||
|
huffLUT{3}=struct('LUT',huffCodebooks{3},'invTable',invTable{3},'codebook',3,'nTupleSize',4,'maxAbsCodeVal',2,'signedValues',0); |
||||
|
huffLUT{4}=struct('LUT',huffCodebooks{4},'invTable',invTable{4},'codebook',4,'nTupleSize',4,'maxAbsCodeVal',2,'signedValues',0); |
||||
|
huffLUT{5}=struct('LUT',huffCodebooks{5},'invTable',invTable{5},'codebook',5,'nTupleSize',2,'maxAbsCodeVal',4,'signedValues',1); |
||||
|
huffLUT{6}=struct('LUT',huffCodebooks{6},'invTable',invTable{6},'codebook',6,'nTupleSize',2,'maxAbsCodeVal',4,'signedValues',1); |
||||
|
huffLUT{7}=struct('LUT',huffCodebooks{7},'invTable',invTable{7},'codebook',7,'nTupleSize',2,'maxAbsCodeVal',7,'signedValues',0); |
||||
|
huffLUT{8}=struct('LUT',huffCodebooks{8},'invTable',invTable{8},'codebook',8,'nTupleSize',2,'maxAbsCodeVal',7,'signedValues',0); |
||||
|
huffLUT{9}=struct('LUT',huffCodebooks{9},'invTable',invTable{9},'codebook',9,'nTupleSize',2,'maxAbsCodeVal',12,'signedValues',0); |
||||
|
huffLUT{10}=struct('LUT',huffCodebooks{10},'invTable',invTable{10},'codebook',10,'nTupleSize',2,'maxAbsCodeVal',12,'signedValues',0); |
||||
|
huffLUT{11}=struct('LUT',huffCodebooks{11},'invTable',invTable{11},'codebook',11,'nTupleSize',2,'maxAbsCodeVal',16,'signedValues',0); |
||||
|
huffLUT{12}=struct('LUT',huffCodebooks{12},'invTable',invTable{12},'codebook',12,'nTupleSize',1,'maxAbsCodeVal',60,'signedValues',1); |
||||
|
|
||||
|
function [h]=vlcTable(codeArray) |
||||
|
% Generates the inverse variable length coding tree, stored in a table. It |
||||
|
% is used for decoding. |
||||
|
% |
||||
|
% codeArray: the input matrix, in the form of a cell array, with each cell |
||||
|
% containing codewords represented in string format (array of chars) |
||||
|
h=zeros(1,3); |
||||
|
for codeIndex=1:length(codeArray) |
||||
|
word=str2num([codeArray{codeIndex}]')'; |
||||
|
hIndex=1; |
||||
|
hLength=size(h,1); |
||||
|
for i=1:length(word) |
||||
|
k=word(i)+1; |
||||
|
if(~h(hIndex,k)) |
||||
|
h(hIndex,k)=hLength+1; |
||||
|
hIndex=hLength+1; |
||||
|
h(hIndex,1:2)=zeros(1,2); |
||||
|
hLength=hLength+1; |
||||
|
else |
||||
|
hIndex=h(hIndex,k); |
||||
|
end |
||||
|
end |
||||
|
h(hIndex,3)=codeIndex; |
||||
|
end |
@ -0,0 +1,75 @@ |
|||||
|
|
||||
|
function y = mdct4(x) |
||||
|
% MDCT4 Calculates the Modified Discrete Cosine Transform |
||||
|
% y = mdct4(x) |
||||
|
% |
||||
|
% Use either a Sine or a Kaiser-Bessel Derived window (KBDWin)with |
||||
|
% 50% overlap for perfect TDAC reconstruction. |
||||
|
% Remember that MDCT coefs are symmetric: y(k)=-y(N-k-1) so the full |
||||
|
% matrix (N) of coefs is: yf = [y;-flipud(y)]; |
||||
|
% |
||||
|
% x: input signal (can be either a column or frame per column) |
||||
|
% length of x must be a integer multiple of 4 (each frame) |
||||
|
% y: MDCT of x (coefs are divided by sqrt(N)) |
||||
|
% |
||||
|
% Vectorize ! ! ! |
||||
|
|
||||
|
% ------- mdct4.m ------------------------------------------ |
||||
|
% Marios Athineos, marios@ee.columbia.edu |
||||
|
% http://www.ee.columbia.edu/~marios/ |
||||
|
% Copyright (c) 2002 by Columbia University. |
||||
|
% All rights reserved. |
||||
|
% ---------------------------------------------------------- |
||||
|
|
||||
|
[flen,fnum] = size(x); |
||||
|
% Make column if it's a single row |
||||
|
if (flen==1) |
||||
|
x = x(:); |
||||
|
flen = fnum; |
||||
|
fnum = 1; |
||||
|
end |
||||
|
% Make sure length is multiple of 4 |
||||
|
if (rem(flen,4)~=0) |
||||
|
error('MDCT4 defined for lengths multiple of four.'); |
||||
|
end |
||||
|
|
||||
|
% We need these for furmulas below |
||||
|
N = flen; % Length of window |
||||
|
M = N/2; % Number of coefficients |
||||
|
N4 = N/4; % Simplify the way eqs look |
||||
|
sqrtN = sqrt(N); |
||||
|
|
||||
|
% Preallocate rotation matrix |
||||
|
% It would be nice to be able to do it in-place but we cannot |
||||
|
% cause of the prerotation. |
||||
|
rot = zeros(flen,fnum); |
||||
|
|
||||
|
% Shift |
||||
|
t = (0:(N4-1)).'; |
||||
|
rot(t+1,:) = -x(t+3*N4+1,:); |
||||
|
t = (N4:(N-1)).'; |
||||
|
rot(t+1,:) = x(t-N4+1,:); |
||||
|
clear x; |
||||
|
|
||||
|
% We need this twice so keep it around |
||||
|
t = (0:(N4-1)).'; |
||||
|
w = diag(sparse(exp(-j*2*pi*(t+1/8)/N))); |
||||
|
|
||||
|
% Pre-twiddle |
||||
|
t = (0:(N4-1)).'; |
||||
|
c = (rot(2*t+1,:)-rot(N-1-2*t+1,:))... |
||||
|
-j*(rot(M+2*t+1,:)-rot(M-1-2*t+1,:)); |
||||
|
% This is a really cool Matlab trick ;) |
||||
|
c = 0.5*w*c; |
||||
|
clear rot; |
||||
|
|
||||
|
% FFT for N/4 points only !!! |
||||
|
c = fft(c,N4); |
||||
|
|
||||
|
% Post-twiddle |
||||
|
c = (2/sqrtN)*w*c; |
||||
|
|
||||
|
% Sort |
||||
|
t = (0:(N4-1)).'; |
||||
|
y(2*t+1,:) = real(c(t+1,:)); |
||||
|
y(M-1-2*t+1,:) = -imag(c(t+1,:)); |
@ -0,0 +1,23 @@ |
|||||
|
function SMR = psycho(frameT, frameType, frameTprev1, frameTprev2) |
||||
|
%Implementation of Psychoacoustic Model |
||||
|
% Usage SMR = psycho(frameT, frameType, frameTprev1, frameTprev2), where: |
||||
|
% Inputs |
||||
|
% - frameT is a frame in the time domain, containing only one of the |
||||
|
% audio channels stored in a vector of length 2048 |
||||
|
% - frameType is the type of the current frame in string |
||||
|
% representation, can be one of "OLS" (ONLY_LONG_SEQUENCE), "LSS" |
||||
|
% (LONG_START_SEQUENCE), "ESH" (EIGHT_SHORT_SEQUENCE), "LPS" |
||||
|
% (LONG_STOP_SEQUENCE) |
||||
|
% - frameTprev1 is the previous frame in the time domain, containing |
||||
|
% only one of the audio channels stored in a vector of length 2048 |
||||
|
% - frameTprev2 is the frame before frameTprev1 in the time domain, |
||||
|
% containing only one of the audio channels stored in a vector of |
||||
|
% length 2048 |
||||
|
% |
||||
|
% Output |
||||
|
% - SMR is the signal to mask ratio array of dimensions 42X8 for |
||||
|
% EIGHT_SHORT_SEQUENCE frames and 69X1 otherwise |
||||
|
|
||||
|
|
||||
|
end |
||||
|
|
Loading…
Reference in new issue