You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
233 lines
11 KiB
233 lines
11 KiB
function AACSeq3 = AACoder3(fNameIn, fnameAACoded)
|
|
%Implementation of AAC encoder
|
|
% Usage AACSeq3 = AACoder3(fNameIn, fnameAACoded), where:
|
|
% Inputs
|
|
% - fNameIn is the filename and path of the file to encode
|
|
% - frameAACoded is the filename and path of the mat file that will
|
|
% be written after encoding
|
|
%
|
|
% Output
|
|
% - AACSeq3 is an array of structs containing K structs, where K is
|
|
% the number of computed frames. Every struct of the array consists
|
|
% of:
|
|
% * a frameType,
|
|
% * a winType,
|
|
% * chl.TNScoeffs which are the quantized TNS coefficients of
|
|
% this frame's left channel,
|
|
% * chr.TNScoeffs which are the quantized TNS coefficients of
|
|
% this frame's right channel,
|
|
% * chl.T which are the psychoacoustic thresholds of this frame's
|
|
% left channel,
|
|
% * chr.T which are the psychoacoustic thresholds of this frame's
|
|
% right channel,
|
|
% * chl.G which are the quantized global gains of this frame's
|
|
% left channel,
|
|
% * chr.G which are the quantized global gains of this frame's
|
|
% right channel,
|
|
% * chl.sfc which is the Huffman encoded sfc sequence of this
|
|
% frame's left channel,
|
|
% * chr.sfc which is the Huffman encoded sfc sequence of this
|
|
% frame's right channel,
|
|
% * chl.stream which is the Huffman encoded quantized MDCT
|
|
% sequence of this frame's left channel,
|
|
% * chr.stream which is the Huffman encoded quantized MDCT
|
|
% sequence of this frame's right channel,
|
|
% * chl.codebook which is the Huffman codebook used for this
|
|
% frame's left channel
|
|
% * chr.codebook which is the Huffman codebook used for this
|
|
% frame's right channel
|
|
|
|
% Declares constant window type
|
|
WINDOW_TYPE = 'KBD';
|
|
% Declares constant numbers of bands for long and short windows
|
|
LONG_WINDOW_NUMBER_OF_BANDS = 69;
|
|
SHORT_WINDOW_NUMBER_OF_BANDS = 42;
|
|
|
|
% Declares persistent variable holding the TNS tables and initializes if empty
|
|
persistent TNSTables;
|
|
if isempty(TNSTables)
|
|
TNSTables = load('TableB219.mat');
|
|
end
|
|
|
|
% Reads the audio file
|
|
[originalAudioData, ~] = audioread(fNameIn);
|
|
|
|
% Splits the audio in frames and determines the type of each frame
|
|
frameTypes{ceil(length(originalAudioData) / 1024), 1} = 'OLS';
|
|
frameTypes{1} = 'OLS';
|
|
for i = 1:length(frameTypes) - 2
|
|
nextFrameStart = (i + 1) * 1024 + 1;
|
|
nextFrameStop = nextFrameStart + 2047;
|
|
if nextFrameStop > length(originalAudioData)
|
|
tmpZeroPadded = originalAudioData(nextFrameStart:length(originalAudioData), :);
|
|
tmpZeroPadded = padarray(tmpZeroPadded, [(2048 - length(tmpZeroPadded)) 0], 'post');
|
|
frameTypes{i+1} = SSC(1, tmpZeroPadded, frameTypes{i});
|
|
|
|
clearvars tmpZeroPadded
|
|
else
|
|
frameTypes{i+1} = SSC(1, originalAudioData(nextFrameStart:nextFrameStop, :), frameTypes{i});
|
|
end
|
|
end
|
|
|
|
% Assigns a type to the last frame
|
|
if strcmp(frameTypes{length(frameTypes) - 1}, 'LSS')
|
|
frameTypes{length(frameTypes)} = 'ESH';
|
|
elseif strcmp(frameTypes{length(frameTypes) - 1}, 'ESH')
|
|
frameTypes{length(frameTypes)} = 'ESH';
|
|
else
|
|
frameTypes{length(frameTypes)} = 'OLS';
|
|
end
|
|
|
|
% Encodes audio file
|
|
huffLUT = loadLUT();
|
|
AACSeq3(length(frameTypes), 1) = struct;
|
|
for i = 0:length(frameTypes) - 1
|
|
currFrameStart = i * 1024 + 1;
|
|
currFrameStop = currFrameStart + 2047;
|
|
if currFrameStop > length(originalAudioData)
|
|
tmpZeroPadded = originalAudioData(nextFrameStart:length(originalAudioData), :);
|
|
tmpZeroPadded = padarray(tmpZeroPadded, [(2048 - length(tmpZeroPadded)) 0], 'post');
|
|
frameF = filterbank(tmpZeroPadded, frameTypes{i+1}, WINDOW_TYPE);
|
|
|
|
clearvars tmpZeroPadded
|
|
else
|
|
frameF = filterbank(originalAudioData(currFrameStart:currFrameStop, :), frameTypes{i+1}, WINDOW_TYPE);
|
|
end
|
|
[frameF(:, 1), TNScoeffsL] = TNS(frameF(:, 1), frameTypes{i+1});
|
|
[frameF(:, 2), TNScoeffsR] = TNS(frameF(:, 2), frameTypes{i+1});
|
|
|
|
if i == 0
|
|
SMRL = psycho(...
|
|
originalAudioData(currFrameStart:currFrameStop, 1), ...
|
|
frameTypes{i+1}, zeros(2048, 1), zeros(2048, 1));
|
|
SMRR = psycho(...
|
|
originalAudioData(currFrameStart:currFrameStop, 2), ...
|
|
frameTypes{i+1}, zeros(2048, 1), zeros(2048, 1));
|
|
elseif i == 1
|
|
prev1FrameStart = (i - 1) * 1024 + 1;
|
|
prev1FrameStop = prev1FrameStart + 2047;
|
|
SMRL = psycho(...
|
|
originalAudioData(currFrameStart:currFrameStop, 1), ...
|
|
frameTypes{i+1}, ...
|
|
originalAudioData(prev1FrameStart:prev1FrameStop, 1), ...
|
|
zeros(2048, 1));
|
|
SMRR = psycho(...
|
|
originalAudioData(currFrameStart:currFrameStop, 2), ...
|
|
frameTypes{i+1}, ...
|
|
originalAudioData(prev1FrameStart:prev1FrameStop, 2), ...
|
|
zeros(2048, 1));
|
|
else
|
|
prev1FrameStart = (i - 1) * 1024 + 1;
|
|
prev1FrameStop = prev1FrameStart + 2047;
|
|
prev2FrameStart = (i - 2) * 1024 + 1;
|
|
prev2FrameStop = prev2FrameStart + 2047;
|
|
|
|
if prev1FrameStop > length(originalAudioData)
|
|
tmpZeroPaddedPrev = originalAudioData(prev1FrameStart:length(originalAudioData), :);
|
|
tmpZeroPaddedPrev = padarray(tmpZeroPaddedPrev, [(2048 - length(tmpZeroPaddedPrev)) 0], 'post');
|
|
tmpZeroPadded = originalAudioData(currFrameStart:length(originalAudioData), :);
|
|
tmpZeroPadded = padarray(tmpZeroPadded, [(2048 - length(tmpZeroPadded)) 0], 'post');
|
|
SMRL = psycho(...
|
|
tmpZeroPadded(:, 1), ...
|
|
frameTypes{i+1}, ...
|
|
tmpZeroPaddedPrev(:, 1), ...
|
|
originalAudioData(prev2FrameStart:prev2FrameStop, 2));
|
|
SMRR = psycho(...
|
|
tmpZeroPadded(:, 2), ...
|
|
frameTypes{i+1}, ...
|
|
tmpZeroPaddedPrev(:, 2), ...
|
|
originalAudioData(prev2FrameStart:prev2FrameStop, 2));
|
|
clearvars tmpZeroPadded tmpZeroPaddedPrev
|
|
elseif currFrameStop > length(originalAudioData)
|
|
tmpZeroPadded = originalAudioData(currFrameStart:length(originalAudioData), :);
|
|
tmpZeroPadded = padarray(tmpZeroPadded, [(2048 - length(tmpZeroPadded)) 0], 'post');
|
|
SMRL = psycho(...
|
|
tmpZeroPadded(:, 1), ...
|
|
frameTypes{i+1}, ...
|
|
originalAudioData(prev1FrameStart:prev1FrameStop, 1), ...
|
|
originalAudioData(prev2FrameStart:prev2FrameStop, 2));
|
|
SMRR = psycho(...
|
|
tmpZeroPadded(:, 2), ...
|
|
frameTypes{i+1}, ...
|
|
originalAudioData(prev1FrameStart:prev1FrameStop, 2), ...
|
|
originalAudioData(prev2FrameStart:prev2FrameStop, 2));
|
|
|
|
clearvars tmpZeroPadded
|
|
else
|
|
SMRL = psycho(...
|
|
originalAudioData(currFrameStart:currFrameStop, 1), ...
|
|
frameTypes{i+1}, ...
|
|
originalAudioData(prev1FrameStart:prev1FrameStop, 1), ...
|
|
originalAudioData(prev2FrameStart:prev2FrameStop, 1));
|
|
SMRR = psycho(...
|
|
originalAudioData(currFrameStart:currFrameStop, 2), ...
|
|
frameTypes{i+1}, ...
|
|
originalAudioData(prev1FrameStart:prev1FrameStop, 2), ...
|
|
originalAudioData(prev2FrameStart:prev2FrameStop, 2));
|
|
end
|
|
end
|
|
|
|
[SL, sfcL, GL] = AACquantizer(frameF(:, 1), frameTypes{i+1}, SMRL);
|
|
[SR, sfcR, GR] = AACquantizer(frameF(:, 2), frameTypes{i+1}, SMRR);
|
|
|
|
if ~strcmp(frameTypes{i+1}, 'ESH')
|
|
TL(LONG_WINDOW_NUMBER_OF_BANDS, 1) = 0;
|
|
TR(LONG_WINDOW_NUMBER_OF_BANDS, 1) = 0;
|
|
for band = 1:LONG_WINDOW_NUMBER_OF_BANDS
|
|
frameWlow = TNSTables.B219a(band, 2) + 1;
|
|
frameWhigh = TNSTables.B219a(band, 3) + 1;
|
|
subFrameF = frameF(frameWlow:frameWhigh);
|
|
TL(band) = sumsqr(subFrameF) ./ SMRL(band);
|
|
TR(band) = sumsqr(subFrameF) ./ SMRR(band);
|
|
end
|
|
else
|
|
TL(SHORT_WINDOW_NUMBER_OF_BANDS, 1) = 0;
|
|
TR(SHORT_WINDOW_NUMBER_OF_BANDS, 1) = 0;
|
|
for subFrameIndex = 1:8
|
|
currFrameStart = (subFrameIndex - 1) * 128 + 1;
|
|
currFrameStop = currFrameStart + 127;
|
|
subFrame = frameF(currFrameStart:currFrameStop);
|
|
for band = 1:SHORT_WINDOW_NUMBER_OF_BANDS
|
|
frameWlow = TNSTables.B219b(band, 2);
|
|
frameWhigh = TNSTables.B219b(band, 3);
|
|
subFrameF = subFrame(frameWlow + 1:frameWhigh + 1);
|
|
|
|
TL(band, subFrameIndex) = sumsqr(subFrameF) ./ SMRL(band);
|
|
TR(band, subFrameIndex) = sumsqr(subFrameF) ./ SMRR(band);
|
|
end
|
|
end
|
|
end
|
|
|
|
[streamL, huffcodebookL] = encodeHuff(SL, huffLUT);
|
|
[streamR, huffcodebookR] = encodeHuff(SR, huffLUT);
|
|
|
|
if strcmp(frameTypes(i+1), 'ESH')
|
|
[sfcHuffL, ~] = encodeHuff(reshape(sfcL(2:end, :), 1, [])', ...
|
|
huffLUT, 12);
|
|
[sfcHuffR, ~] = encodeHuff(reshape(sfcR(2:end, :), 1, [])', ...
|
|
huffLUT, 12);
|
|
else
|
|
[sfcHuffL, ~] = encodeHuff(sfcL(2:end), huffLUT, 12);
|
|
[sfcHuffR, ~] = encodeHuff(sfcR(2:end), huffLUT, 12);
|
|
end
|
|
|
|
AACSeq3(i + 1).frameType = frameTypes(i + 1);
|
|
AACSeq3(i + 1).winType = WINDOW_TYPE;
|
|
AACSeq3(i + 1).chl.TNScoeffs = TNScoeffsL;
|
|
AACSeq3(i + 1).chr.TNScoeffs = TNScoeffsR;
|
|
AACSeq3(i + 1).chl.T = TL;
|
|
AACSeq3(i + 1).chr.T = TR;
|
|
AACSeq3(i + 1).chl.G = GL;
|
|
AACSeq3(i + 1).chr.G = GR;
|
|
AACSeq3(i + 1).chl.sfc = sfcHuffL;
|
|
AACSeq3(i + 1).chr.sfc = sfcHuffR;
|
|
AACSeq3(i + 1).chl.stream = streamL;
|
|
AACSeq3(i + 1).chr.stream = streamR;
|
|
AACSeq3(i + 1).chl.codebook = huffcodebookL;
|
|
AACSeq3(i + 1).chr.codebook = huffcodebookR;
|
|
|
|
clearvars TL TR
|
|
end
|
|
|
|
save(fnameAACoded, 'AACSeq3');
|
|
end
|
|
|