Semester assignment for the course "Multimedia systems and virtual reality" of THMMY in AUTH university.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

154 lines
6.1 KiB

6 years ago
function AACSeq3 = AACoder3(fNameIn, fnameAACoded)
%Implementation of AAC encoder
% Usage AACSeq3 = AACoder3(fNameIn, fnameAACoded), where:
% Inputs
% - fNameIn is the filename and path of the file to encode
% - frameAACoded is the filename and path of the mat file that will
% be written after encoding
%
% Output
% - AACSeq3 is an array of structs containing K structs, where K is
% the number of computed frames. Every struct of the array consists
% of:
% * a frameType,
% * a winType,
% * chl.TNScoeffs which are the quantized TNS coefficients of
% this frame's left channel,
% * chr.TNScoeffs which are the quantized TNS coefficients of
% this frame's right channel,
% * chl.T which are the psychoacoustic thresholds of this frame's
% left channel,
% * chr.T which are the psychoacoustic thresholds of this frame's
% right channel,
% * chl.G which are the quantized global gains of this frame's
% left channel,
% * chr.G which are the quantized global gains of this frame's
% right channel,
% * chl.sfc which is the Huffman encoded sfc sequence of this
% frame's left channel,
% * chr.sfc which is the Huffman encoded sfc sequence of this
% frame's right channel,
% * chl.stream which is the Huffman encoded quantized MDCT
% sequence of this frame's left channel,
% * chr.stream which is the Huffman encoded quantized MDCT
% sequence of this frame's right channel,
% * chl.codebook which is the Huffman codebook used for this
% frame's left channel
% * chr.codebook which is the Huffman codebook used for this
% frame's right channel
% Declares constant window type
WINDOW_TYPE = 'SIN';
% Declares constant numbers of bands for long and short windows
LONG_WINDOW_NUMBER_OF_BANDS = 69;
SHORT_WINDOW_NUMBER_OF_BANDS = 42;
% Declares persistent variable holding the TNS tables and initializes if empty
persistent TNSTables;
if isempty(TNSTables)
TNSTables = load('TableB219.mat');
end
6 years ago
% Reads the audio file
[originalAudioData, ~] = audioread(fNameIn);
% Splits the audio in frames and determines the type of each frame
frameTypes{fix((length(originalAudioData) - 1025) / 1024)} = 'OLS';
frameTypes{1} = 'OLS';
for i = 1:length(frameTypes) - 2
nextFrameStart = (i + 1) * 1024 + 1;
nextFrameStop = nextFrameStart + 2047;
frameTypes{i+1} = SSC(1, originalAudioData(nextFrameStart:nextFrameStop, :), frameTypes{i});
end
% Assigns a type to the last frame
if strcmp(frameTypes{length(frameTypes) - 1}, 'LSS')
frameTypes{length(frameTypes)} = 'ESH';
elseif strcmp(frameTypes{length(frameTypes) - 1}, 'ESH')
frameTypes{length(frameTypes)} = 'ESH';
else
frameTypes{length(frameTypes)} = 'OLS';
end
% Encodes audio file
huffLUT = loadLUT();
AACSeq3(length(frameTypes)) = struct;
TL(LONG_WINDOW_NUMBER_OF_BANDS) = 0;
TR(LONG_WINDOW_NUMBER_OF_BANDS) = 0;
6 years ago
for i = 0:length(frameTypes) - 1
currFrameStart = i * 1024 + 1;
currFrameStop = currFrameStart + 2047;
frameF = filterbank(originalAudioData(currFrameStart:currFrameStop, :), frameTypes{i+1}, WINDOW_TYPE);
[frameF(:, 1), TNScoeffsL] = TNS(frameF(:, 1), frameTypes{i+1});
[frameF(:, 2), TNScoeffsR] = TNS(frameF(:, 2), frameTypes{i+1});
if i < 2
% TODO: what happens on the first two frames?
6 years ago
SL = frameF(:, 1);
SR = frameF(:, 2);
continue;
6 years ago
else
prev1FrameStart = (i - 1) * 1024 + 1;
prev1FrameStop = prev1FrameStart + 2047;
prev2FrameStart = (i - 2) * 1024 + 1;
6 years ago
prev2FrameStop = prev2FrameStart + 2047;
SMRL = psycho(...
originalAudioData(currFrameStart:currFrameStop, 1), ...
6 years ago
frameTypes{i+1}, ...
6 years ago
originalAudioData(prev1FrameStart:prev1FrameStop, 1), ...
originalAudioData(prev2FrameStart:prev2FrameStop, 1));
SMRR = psycho(...
originalAudioData(currFrameStart:currFrameStop, 2), ...
frameTypes{i+1}, ...
originalAudioData(prev1FrameStart:prev1FrameStop, 2), ...
originalAudioData(prev2FrameStart:prev2FrameStop, 2));
continue;
6 years ago
[SL, sfcL, GL] = AACquantizer(frameF, frameTypes{i+1}, SMRL);
[SR, sfcR, GR] = AACquantizer(frameF, frameTypes{i+1}, SMRR);
6 years ago
end
for band = 1:LONG_WINDOW_NUMBER_OF_BANDS
TL(band) = sumsqr(frameF((TNSTables.B219a(band, 2) + 1: ...
TNSTables.B219a(band, 3) + 1), 1));
TR(band) = sumsqr(frameF((TNSTables.B219a(band, 2) + 1: ...
TNSTables.B219a(band, 3) + 1), 2));
end
TL = TL ./ SMRL;
TR = TR ./ SMRR;
6 years ago
[streamL, huffcodebookL] = encodeHuff(SL, huffLUT);
[streamR, huffcodebookR] = encodeHuff(SR, huffLUT);
[sfcL, ~] = encodeHuff(sfcL, huffLUT, 12);
[sfcR, ~] = encodeHuff(sfcR, huffLUT, 12);
AACSeq3(i + 1).frameType = frameTypes(i + 1);
AACSeq3(i + 1).winType = WINDOW_TYPE;
AACSeq3(i + 1).chl.TNScoeffs = TNScoeffsL;
AACSeq3(i + 1).chr.TNScoeffs = TNScoeffsR;
AACSeq3(i + 1).chl.T = TL;
AACSeq3(i + 1).chr.T = TR;
6 years ago
AACSeq3(i + 1).chl.G = GL;
AACSeq3(i + 1).chr.G = GR;
AACSeq3(i + 1).chl.sfc = sfcL;
AACSeq3(i + 1).chr.sfc = sfcR;
AACSeq3(i + 1).chl.stream = streamL;
AACSeq3(i + 1).chr.stream = streamR;
AACSeq3(i + 1).chl.codebook = huffcodebookL;
AACSeq3(i + 1).chr.codebook = huffcodebookR;
end
save(fnameAACoded,AACSeq3);
if false
[idx,label] = grp2idx(sort(frameTypes));
hist(idx,unique(idx));
set(gca,'xTickLabel',label)
sum(idx(:) == 1)
sum(idx(:) == 2)
sum(idx(:) == 3)
sum(idx(:) == 4)
end
end