Apostolos Fanakis
6 years ago
14 changed files with 771 additions and 1 deletions
@ -0,0 +1,65 @@ |
|||||
|
function AACSeq2 = AACoder2(fNameIn) |
||||
|
%Implementation of WHAT?? //TODO!! |
||||
|
% Usage AACSeq2 = AACoder2(fNameIn), where: |
||||
|
% Inputs |
||||
|
% - fNameIn is the filename and path of the file to encode |
||||
|
% |
||||
|
% Output |
||||
|
% - AACSeq2 is an array of structs containing K structs, where K is |
||||
|
% the number of computed frames. Every struct of the array consists |
||||
|
% of a frameType, a winType, chl.TNScoeffs which are the quantized TNS |
||||
|
% coefficients of this frame's left channel, chr.TNScoeffs which are |
||||
|
% the quantized TNS coefficients of this frame's right channel, |
||||
|
% chl.frameF which are the MDCT coefficients of this frame's left |
||||
|
% channel, chr.frameF which are the MDCT coefficients of this frame's |
||||
|
% right channel |
||||
|
|
||||
|
% Reads the audio file |
||||
|
[originalAudioData, ~] = audioread(fNameIn); |
||||
|
|
||||
|
% Splits the audio in frames and determines the type of each frame |
||||
|
frameTypes{fix((length(originalAudioData) - 1025) / 1024)} = 'OLS'; |
||||
|
frameTypes{1} = 'OLS'; |
||||
|
for i = 1:length(frameTypes) - 2 |
||||
|
nextFrameStart = (i + 1) * 1024 + 1; |
||||
|
nextFrameStop = nextFrameStart + 2047; |
||||
|
frameTypes{i+1} = SSC(1, originalAudioData(nextFrameStart:nextFrameStop, :), frameTypes{i}); |
||||
|
end |
||||
|
|
||||
|
% Assignes a type to the last frame |
||||
|
if strcmp(frameTypes{length(frameTypes) - 1}, 'LSS') |
||||
|
frameTypes{length(frameTypes)} = 'ESH'; |
||||
|
elseif strcmp(frameTypes{length(frameTypes) - 1}, 'ESH') |
||||
|
frameTypes{length(frameTypes)} = 'ESH'; |
||||
|
else |
||||
|
frameTypes{length(frameTypes)} = 'OLS'; |
||||
|
end |
||||
|
|
||||
|
% Encodes audio file |
||||
|
AACSeq2(length(frameTypes)) = struct; |
||||
|
for i = 0:length(frameTypes) - 1 |
||||
|
currFrameStart = i * 1024 + 1; |
||||
|
currFrameStop = currFrameStart + 2047; |
||||
|
frameF = filterbank(originalAudioData(currFrameStart:currFrameStop, :), frameTypes{i+1}, 'KBD'); |
||||
|
[frameF(:, 1), TNScoeffsL] = TNS(frameF(:, 1), frameTypes{i+1}); |
||||
|
[frameF(:, 2), TNScoeffsR] = TNS(frameF(:, 2), frameTypes{i+1}); |
||||
|
|
||||
|
AACSeq2(i + 1).frameType = frameTypes(i + 1); |
||||
|
AACSeq2(i + 1).winType = 'KBD'; |
||||
|
AACSeq2(i + 1).chl.TNScoeffs = TNScoeffsL; |
||||
|
AACSeq2(i + 1).chr.TNScoeffs = TNScoeffsR; |
||||
|
AACSeq2(i + 1).chl.frameF = frameF(:, 1); |
||||
|
AACSeq2(i + 1).chr.frameF = frameF(:, 2); |
||||
|
end |
||||
|
|
||||
|
if true |
||||
|
[idx,label] = grp2idx(sort(frameTypes)); |
||||
|
hist(idx,unique(idx)); |
||||
|
set(gca,'xTickLabel',label) |
||||
|
|
||||
|
sum(idx(:) == 1) |
||||
|
sum(idx(:) == 2) |
||||
|
sum(idx(:) == 3) |
||||
|
sum(idx(:) == 4) |
||||
|
end |
||||
|
end |
Binary file not shown.
@ -0,0 +1,85 @@ |
|||||
|
function frameType = SSC(~, nextFrameT, prevFrameType) |
||||
|
%Implementation of the Sequence Segmentation Control step |
||||
|
% Usage frameType = SSC(frameT, nextFrameT, prevFrameType), where: |
||||
|
% Inputs |
||||
|
% - frameT is a frame in the time domain, containing both channels of |
||||
|
% the audio stored in an array of dimensions 2048X2 |
||||
|
% - nextFrameT is the next frame in the time domain, containing both |
||||
|
% channels of the audio stored in an array of dimensions 2048X2 |
||||
|
% - prevFrameType is the type of the previous frame in string |
||||
|
% representation, can be one of "OLS" (ONLY_LONG_SEQUENCE), "LSS" |
||||
|
% (LONG_START_SEQUENCE), "ESH" (EIGHT_SHORT_SEQUENCE), "LPS" |
||||
|
% (LONG_STOP_SEQUENCE) |
||||
|
% |
||||
|
% Output |
||||
|
% - frameType is the type of the current frame in string |
||||
|
% representation, can be one of "OLS" (ONLY_LONG_SEQUENCE), "LSS" |
||||
|
% (LONG_START_SEQUENCE), "ESH" (EIGHT_SHORT_SEQUENCE), "LPS" |
||||
|
% (LONG_STOP_SEQUENCE) |
||||
|
|
||||
|
% Examines the cases where the determination of the type of the next |
||||
|
% frame isn't needed for |
||||
|
if strcmp(prevFrameType, 'LSS') |
||||
|
frameType = 'ESH'; |
||||
|
return; |
||||
|
elseif strcmp(prevFrameType, 'LPS') |
||||
|
frameType = 'OLS'; |
||||
|
return; |
||||
|
end |
||||
|
|
||||
|
% Determines the type of the next frame |
||||
|
% Filters frame |
||||
|
nextFrameT = filter([0.7548, -0.7548], [1, -0.5095], nextFrameT, [], 1); |
||||
|
|
||||
|
channelFrameType = {'nan', 'nan'}; |
||||
|
|
||||
|
for channel = 1:2 |
||||
|
% Calculates sub-frame energy estimation |
||||
|
[subFrames, ~] = buffer(nextFrameT(449:end - 448, channel), 256, 128, 'nodelay'); |
||||
|
energyEstimations = sum(subFrames .^ 2, 1); |
||||
|
|
||||
|
% Calculates the ratio of the sub-frame energy to the average energy of |
||||
|
% the previous sub-frames |
||||
|
nextIsESH = false; |
||||
|
for subFrameIndex = 2:8 |
||||
|
energyRatio = energyEstimations(subFrameIndex) / ... |
||||
|
mean(energyEstimations(1:subFrameIndex - 1)); |
||||
|
|
||||
|
if (energyEstimations(subFrameIndex) > 10^(-3)) && (energyRatio > 10) |
||||
|
nextIsESH = true; |
||||
|
break; |
||||
|
end |
||||
|
end |
||||
|
|
||||
|
if nextIsESH == true |
||||
|
if strcmp(prevFrameType, 'ESH') |
||||
|
% This frame of this channel is an EIGHT_SHORT_SEQUENCE type |
||||
|
% frame. This means the frames of both channels will be encoded |
||||
|
% as EIGHT_SHORT_SEQUENCE type frames. |
||||
|
frameType = 'ESH'; |
||||
|
return; |
||||
|
elseif strcmp(prevFrameType, 'OLS') |
||||
|
channelFrameType{channel} = 'LSS'; |
||||
|
end |
||||
|
else |
||||
|
if strcmp(prevFrameType, 'ESH') |
||||
|
channelFrameType{channel} = 'LPS'; |
||||
|
elseif strcmp(prevFrameType, 'OLS') |
||||
|
channelFrameType{channel} = 'OLS'; |
||||
|
end |
||||
|
end |
||||
|
end |
||||
|
|
||||
|
if strcmp(channelFrameType{1}, 'nan') || strcmp(channelFrameType{2}, 'nan') |
||||
|
error('SSC, l[73]: Internal error occured!') |
||||
|
end |
||||
|
if strcmp(channelFrameType{1}, 'OLS') |
||||
|
frameType = channelFrameType{2}; |
||||
|
elseif strcmp(channelFrameType{2}, 'OLS') |
||||
|
frameType = channelFrameType{1}; |
||||
|
elseif strcmp(channelFrameType{1}, channelFrameType{2}) |
||||
|
frameType = channelFrameType{1}; |
||||
|
else |
||||
|
frameType = 'ESH'; |
||||
|
end |
||||
|
end |
@ -0,0 +1,172 @@ |
|||||
|
function [frameFout, TNScoeffs] = TNS(frameFin, frameType) |
||||
|
%Implementation of the TNS step |
||||
|
% Usage [frameFout, TNScoeffs] = TNS(frameFin, frameType), where: |
||||
|
% Inputs |
||||
|
% - frameFin is the frame in the frequency domain, in MDCT coefficients |
||||
|
% representation containing both channels of the audio stored in an |
||||
|
% array of dimensions 1024X2 |
||||
|
% - frameType is the type of the current frame in string |
||||
|
% representation, can be one of "OLS" (ONLY_LONG_SEQUENCE), "LSS" |
||||
|
% (LONG_START_SEQUENCE), "ESH" (EIGHT_SHORT_SEQUENCE), "LPS" |
||||
|
% (LONG_STOP_SEQUENCE) |
||||
|
% |
||||
|
% Output |
||||
|
% - frameFout is the frame in the frequency domain after Temporal Noise |
||||
|
% Shaping, in MDCT coefficients representation containing both channels |
||||
|
% of the audio stored in an array of dimensions 1024X2 |
||||
|
% - TNScoeffs is the quantized TNS coefficients array of dimensions |
||||
|
% 4X8 for EIGHT_SHORT_SEQUENCE frames and 4X1 otherwise |
||||
|
|
||||
|
% Declares constant numbers of bands for long and short windows |
||||
|
LONG_WINDOW_NUMBER_OF_BANDS = 69; |
||||
|
SHORT_WINDOW_NUMBER_OF_BANDS = 42; |
||||
|
% Declares constant order of the linear prediction filter |
||||
|
LPF_ORDER = 4; |
||||
|
% Declares constant coefficients' resolution |
||||
|
COEF_RES = 4; |
||||
|
|
||||
|
% Declares persistent variable holding the TNS tables and initializes if empty |
||||
|
persistent TNSTables; |
||||
|
if isempty(TNSTables) |
||||
|
TNSTables = load('TableB219.mat'); |
||||
|
end |
||||
|
|
||||
|
if ~strcmp(frameType, 'ESH') |
||||
|
% Calculates the energy per band |
||||
|
bandEnergy(LONG_WINDOW_NUMBER_OF_BANDS) = 0; |
||||
|
for band = 1:LONG_WINDOW_NUMBER_OF_BANDS - 1 |
||||
|
bandEnergy(band) = sumsqr(frameFin(TNSTables.B219a(band, 2) + 1:TNSTables.B219a(band + 1, 2))); |
||||
|
end |
||||
|
bandEnergy(LONG_WINDOW_NUMBER_OF_BANDS) = sumsqr( ... |
||||
|
frameFin(TNSTables.B219a(LONG_WINDOW_NUMBER_OF_BANDS, 2) + 1:... |
||||
|
TNSTables.B219a(LONG_WINDOW_NUMBER_OF_BANDS, 3) + 1)); |
||||
|
|
||||
|
% Calculates the normalization factors |
||||
|
|
||||
|
% PROBABLY WRONG, THE ONE BELLOW SHOULD BE RIGHT AND BETTER |
||||
|
% normalizationFactor(1024) = 0; |
||||
|
% for normIndex = 1:1024 |
||||
|
% bandIndices = find(TNSTables.B219a(:, 2) >= (normIndex - 1), 1); |
||||
|
% if ~isempty(bandIndices) |
||||
|
% normalizationFactor(normIndex) = sqrt(bandEnergy(bandIndices)); |
||||
|
% else |
||||
|
% normalizationFactor(normIndex) = sqrt(bandEnergy(find(TNSTables.B219a(:, 3) >= normIndex - 1, 1))); |
||||
|
% end |
||||
|
% end |
||||
|
|
||||
|
bandIndices = quantiz(0:1023, TNSTables.B219a(:, 2) - 1); |
||||
|
normalizationFactor = sqrt(bandEnergy(bandIndices)); |
||||
|
|
||||
|
% Smooths normalization factors |
||||
|
for normIndex = 1023:-1:1 |
||||
|
normalizationFactor(normIndex) = (normalizationFactor(normIndex) + normalizationFactor(normIndex + 1)) / 2; |
||||
|
end |
||||
|
for normIndex = 2:1024 |
||||
|
normalizationFactor(normIndex) = (normalizationFactor(normIndex) + normalizationFactor(normIndex - 1)) / 2; |
||||
|
end |
||||
|
|
||||
|
% Normalizes MDCT coefficients according to each band energy |
||||
|
normalizedFrameFin = frameFin ./ normalizationFactor'; |
||||
|
|
||||
|
% Calculates the linear prediction coefficients |
||||
|
[linPredCoeff, ~] = lpc(normalizedFrameFin, LPF_ORDER); |
||||
|
|
||||
|
% Quantizes these coefficients |
||||
|
quantizedLinPredCoeff(LPF_ORDER) = 0; |
||||
|
for coeffIndex = 2:length(linPredCoeff) |
||||
|
quantizedLinPredCoeff(coeffIndex - 1) = asin(linPredCoeff(coeffIndex)); |
||||
|
if linPredCoeff(coeffIndex) >= 0 |
||||
|
quantizedLinPredCoeff(coeffIndex - 1) = round(quantizedLinPredCoeff(coeffIndex - 1) * ... |
||||
|
(bitshift(1, COEF_RES - 1) - 0.5) / (pi / 2)); |
||||
|
quantizedLinPredCoeff(coeffIndex - 1) = quantizedLinPredCoeff(coeffIndex - 1) / ... |
||||
|
(bitshift(1, COEF_RES - 1) - 0.5) / (pi / 2); |
||||
|
quantizedLinPredCoeff(coeffIndex - 1) = sin (quantizedLinPredCoeff(coeffIndex - 1)); |
||||
|
else |
||||
|
quantizedLinPredCoeff(coeffIndex - 1) = round(quantizedLinPredCoeff(coeffIndex - 1) * ... |
||||
|
(bitshift(1, COEF_RES - 1) + 0.5) / (pi / 2)); |
||||
|
quantizedLinPredCoeff(coeffIndex - 1) = quantizedLinPredCoeff(coeffIndex - 1) / ... |
||||
|
(bitshift(1, COEF_RES - 1) + 0.5) / (pi / 2); |
||||
|
quantizedLinPredCoeff(coeffIndex - 1) = sin (quantizedLinPredCoeff(coeffIndex - 1)); |
||||
|
end |
||||
|
end |
||||
|
|
||||
|
% Filters MDCT coefficients |
||||
|
if ~isstable(1, [1 (quantizedLinPredCoeff * (-1))]) |
||||
|
error('TNS, l[79]: Inverse filter not stable!'); |
||||
|
else |
||||
|
TNScoeffs = quantizedLinPredCoeff; |
||||
|
frameFout = filter([1 (quantizedLinPredCoeff * (-1))], 1, frameFin); |
||||
|
end |
||||
|
else |
||||
|
% Initializes output vectors |
||||
|
TNScoeffs(LPF_ORDER * 8) = 0; |
||||
|
frameFout(1024) = 0; |
||||
|
|
||||
|
bandEnergy(SHORT_WINDOW_NUMBER_OF_BANDS) = 0; |
||||
|
for subFrameIndex = 1:8 |
||||
|
subFrame = frameFin((subFrameIndex - 1) * 128 + 1:subFrameIndex * 128); |
||||
|
|
||||
|
% Calculates the energy per band |
||||
|
for band = 1:SHORT_WINDOW_NUMBER_OF_BANDS - 1 |
||||
|
bandEnergy(band) = sumsqr(subFrame(TNSTables.B219b(band, 2) + 1:TNSTables.B219b(band + 1, 2))); |
||||
|
end |
||||
|
bandEnergy(SHORT_WINDOW_NUMBER_OF_BANDS) = sumsqr( ... |
||||
|
subFrame(TNSTables.B219b(SHORT_WINDOW_NUMBER_OF_BANDS, 2) + 1:... |
||||
|
TNSTables.B219b(SHORT_WINDOW_NUMBER_OF_BANDS, 3) + 1)); |
||||
|
|
||||
|
% Calculates the normalization factors |
||||
|
normalizationFactor(128) = 0; |
||||
|
for normIndex = 1:128 |
||||
|
bandIndices = find(TNSTables.B219b(:, 2) >= normIndex - 1, 1); |
||||
|
if ~isempty(bandIndices) |
||||
|
normalizationFactor(normIndex) = sqrt(bandEnergy(bandIndices)); |
||||
|
else |
||||
|
normalizationFactor(normIndex) = sqrt(bandEnergy(find(TNSTables.B219b(:, 3) <= normIndex - 1, 1))); |
||||
|
end |
||||
|
end |
||||
|
|
||||
|
% Smooths normalization factors |
||||
|
for normIndex = 127:-1:1 |
||||
|
normalizationFactor(normIndex) = (normalizationFactor(normIndex) + normalizationFactor(normIndex + 1)) / 2; |
||||
|
end |
||||
|
for normIndex = 2:128 |
||||
|
normalizationFactor(normIndex) = (normalizationFactor(normIndex) + normalizationFactor(normIndex - 1)) / 2; |
||||
|
end |
||||
|
|
||||
|
% Normalizes MDCT coefficients according to each band energy |
||||
|
normalizedFrameFin = subFrame ./ normalizationFactor'; |
||||
|
|
||||
|
% Calculates the linear prediction coefficients |
||||
|
[linPredCoeff, ~] = lpc(normalizedFrameFin, LPF_ORDER); |
||||
|
|
||||
|
% Quantizes these coefficients |
||||
|
quantizedLinPredCoeff(LPF_ORDER) = 0; |
||||
|
for coeffIndex = 1:length(linPredCoeff) |
||||
|
quantizedLinPredCoeff(coeffIndex - 1) = asin(linPredCoeff(coeffIndex)); |
||||
|
if linPredCoeff(coeffIndex) >= 0 |
||||
|
quantizedLinPredCoeff(coeffIndex - 1) = round(quantizedLinPredCoeff(coeffIndex - 1) * ... |
||||
|
(bitshift(1, COEF_RES - 1) - 0.5) / (pi / 2)); |
||||
|
quantizedLinPredCoeff(coeffIndex - 1) = quantizedLinPredCoeff(coeffIndex - 1) / ... |
||||
|
(bitshift(1, COEF_RES - 1) - 0.5) / (pi / 2); |
||||
|
quantizedLinPredCoeff(coeffIndex - 1) = sin (quantizedLinPredCoeff(coeffIndex - 1)); |
||||
|
else |
||||
|
quantizedLinPredCoeff(coeffIndex - 1) = round(quantizedLinPredCoeff(coeffIndex - 1) * ... |
||||
|
(bitshift(1, COEF_RES - 1) + 0.5) / (pi / 2)); |
||||
|
quantizedLinPredCoeff(coeffIndex - 1) = quantizedLinPredCoeff(coeffIndex - 1) / ... |
||||
|
(bitshift(1, COEF_RES - 1) + 0.5) / (pi / 2); |
||||
|
quantizedLinPredCoeff(coeffIndex - 1) = sin (quantizedLinPredCoeff(coeffIndex - 1)); |
||||
|
end |
||||
|
end |
||||
|
|
||||
|
% Filters MDCT coefficients |
||||
|
if ~isstable(1, [1 (quantizedLinPredCoeff * (-1))]) |
||||
|
error('TNS, l[79]: Inverse filter not stable!'); |
||||
|
else |
||||
|
TNScoeffs((subFrameIndex - 1) * 4 + 1:subFrameIndex * 4) = quantizedLinPredCoeff; |
||||
|
frameFout((subFrameIndex - 1) * 128 + 1:subFrameIndex * 128) = ... |
||||
|
filter([1 (quantizedLinPredCoeff * (-1))], 1, subFrame); |
||||
|
end |
||||
|
end |
||||
|
end |
||||
|
end |
||||
|
|
Binary file not shown.
Binary file not shown.
@ -0,0 +1,27 @@ |
|||||
|
function SNR = demoAAC2(fNameIn, fNameOut) |
||||
|
%Implementation of WHAT?? //TODO!! |
||||
|
% Usage SNR = demoAAC2(fNameIn, fNameOut), where: |
||||
|
% Inputs |
||||
|
% - fNameIn is the filename and path of the file to encode |
||||
|
% - fNameOut is the filename and path of the file that will be |
||||
|
% written after decoding |
||||
|
% |
||||
|
% Output |
||||
|
% - SNR is the signal to noise ration computed after successively |
||||
|
% encoding and decoding the audio signal |
||||
|
|
||||
|
AACSeq2 = AACoder2(fNameIn); |
||||
|
decodedAudio = iAACoder2(AACSeq2, fNameOut); |
||||
|
|
||||
|
[audioData, ~] = audioread(fNameIn); |
||||
|
|
||||
|
figure() |
||||
|
plot(audioData(1:length(decodedAudio), 1) - decodedAudio(1:end, 1)) |
||||
|
figure() |
||||
|
plot(audioData(1:length(decodedAudio), 2) - decodedAudio(1:end, 2)) |
||||
|
|
||||
|
snr(audioData(1:length(decodedAudio), 1), audioData(1:length(decodedAudio), 1) - decodedAudio(1:end, 1)) |
||||
|
snr(audioData(1:length(decodedAudio), 2), audioData(1:length(decodedAudio), 2) - decodedAudio(1:end, 2)) |
||||
|
SNR = 10*log10((sum(audioData(1:length(decodedAudio), :)) .^ 2) ./ ... |
||||
|
(sum(audioData(1:length(decodedAudio), :) - decodedAudio) .^ 2)); |
||||
|
end |
@ -0,0 +1,103 @@ |
|||||
|
function frameF = filterbank(frameT, frameType, winType) |
||||
|
%Implementation of the Filter Bank step |
||||
|
% Usage frameF = filterbank(frameT, frameType, winType), where: |
||||
|
% Inputs |
||||
|
% - frameT is a frame in the time domain, containing both channels of |
||||
|
% the audio stored in an array of dimensions 2048X2 |
||||
|
% - frameType is the type of the current frame in string |
||||
|
% representation, can be one of "OLS" (ONLY_LONG_SEQUENCE), "LSS" |
||||
|
% (LONG_START_SEQUENCE), "ESH" (EIGHT_SHORT_SEQUENCE), "LPS" |
||||
|
% (LONG_STOP_SEQUENCE) |
||||
|
% - winType is the type of the window selected, can be one of "KBD", |
||||
|
% "SIN" |
||||
|
% |
||||
|
% Output |
||||
|
% - frameF is the frame in the frequency domain, in MDCT coefficients |
||||
|
% representation containing both channels of the audio stored in an |
||||
|
% array of dimensions 1024X2 |
||||
|
|
||||
|
% Declares persistent windows variables and initializes if empty |
||||
|
persistent kaiserWindowLong kaiserWindowShort sinWindowLong sinWindowShort; |
||||
|
if isempty(kaiserWindowLong) || isempty(kaiserWindowShort) || ... |
||||
|
isempty(sinWindowLong) || isempty(sinWindowShort) |
||||
|
kaiserLong = kaiser(1024, 6*pi); |
||||
|
kaiserSumLong = sum(kaiserLong); |
||||
|
kaiserShort = kaiser(128, 4*pi); |
||||
|
kaiserSumShort = sum(kaiserShort); |
||||
|
|
||||
|
for n = 1:1024 |
||||
|
kaiserWindowLong(n) = sqrt(sum(kaiserLong(1:n))/kaiserSumLong); |
||||
|
kaiserWindowLong(1024 + n) = sqrt(sum(kaiserLong(1:end-n+1))/kaiserSumLong); |
||||
|
|
||||
|
sinWindowLong(n) = sin(pi*(n + 0.5)/2048); |
||||
|
sinWindowLong(1024 + n) = sin(pi*(1024 + n + 0.5)/2048); |
||||
|
|
||||
|
end |
||||
|
|
||||
|
for n = 1:128 |
||||
|
kaiserWindowShort(n) = sqrt(sum(kaiserShort(1:n))/kaiserSumShort); |
||||
|
kaiserWindowShort(128 + n) = sqrt(sum(kaiserShort(1:end-n+1))/kaiserSumShort); |
||||
|
|
||||
|
sinWindowShort(n) = sin(pi*(n + 0.5)/256); |
||||
|
sinWindowShort(128 + n) = sin(pi*(128 + n + 0.5)/256); |
||||
|
end |
||||
|
end |
||||
|
|
||||
|
frameF(1024, 2) = 0; |
||||
|
|
||||
|
% Applies appropriate window to the frame |
||||
|
for channel=1:2 |
||||
|
if strcmp(frameType, 'OLS') |
||||
|
if strcmp(winType, 'KBD') |
||||
|
frameT(:, channel) = frameT(:, channel) .* kaiserWindowLong(:); |
||||
|
elseif strcmp(winType, 'SIN') |
||||
|
frameT(:, channel) = frameT(:, channel) .* sinWindowLong(:); |
||||
|
else |
||||
|
error('filterbank, l[20]: Unsupported window type input!') |
||||
|
end |
||||
|
|
||||
|
frameF(:, channel) = mdct4(frameT(:, channel)); |
||||
|
elseif strcmp(frameType, 'LSS') |
||||
|
if strcmp(winType, 'KBD') |
||||
|
frameT(1:1024, channel) = frameT(1:1024, channel) .* kaiserWindowLong(1:1024)'; |
||||
|
frameT(1473:1600, channel) = frameT(1473:1600, channel) .* kaiserWindowShort(129:end)'; |
||||
|
frameT(1601:end, channel) = 0; |
||||
|
elseif strcmp(winType, 'SIN') |
||||
|
frameT(1:1024, channel) = frameT(1:1024, channel) .* sinWindowLong(1:1024)'; |
||||
|
frameT(1473:1600, channel) = frameT(1473:1600, channel) .* sinWindowShort(129:end)'; |
||||
|
frameT(1601:end, channel) = 0; |
||||
|
else |
||||
|
error('filterbank, l[20]: Unsupported window type input!') |
||||
|
end |
||||
|
|
||||
|
frameF(:, channel) = mdct4(frameT(:, channel)); |
||||
|
elseif strcmp(frameType, 'LPS') |
||||
|
if strcmp(winType, 'KBD') |
||||
|
frameT(1:448, channel) = 0; |
||||
|
frameT(449:576, channel) = frameT(449:576, channel) .* kaiserWindowShort(1:128)'; |
||||
|
frameT(1025:end, channel) = frameT(1025:end, channel) .* kaiserWindowLong(1025:end)'; |
||||
|
elseif strcmp(winType, 'SIN') |
||||
|
frameT(1:448, channel) = 0; |
||||
|
frameT(449:576, channel) = frameT(449:576, channel) .* sinWindowShort(1:128)'; |
||||
|
frameT(1025:end, channel) = frameT(1025:end, channel) .* sinWindowLong(1025:end)'; |
||||
|
else |
||||
|
error('filterbank, l[20]: Unsupported window type input!') |
||||
|
end |
||||
|
|
||||
|
frameF(:, channel) = mdct4(frameT(:, channel)); |
||||
|
elseif strcmp(frameType, 'ESH') |
||||
|
% Splits the frame into sub-frames |
||||
|
[subFrames, ~] = buffer(frameT(449:end-448, channel), 256, 128, 'nodelay'); |
||||
|
|
||||
|
if strcmp(winType, 'KBD') |
||||
|
subFrames = subFrames .* repmat(kaiserWindowShort', [1 8]); |
||||
|
elseif strcmp(winType, 'SIN') |
||||
|
subFrames = subFrames .* repmat(sinWindowShort', [1 8]); |
||||
|
end |
||||
|
|
||||
|
for subFrameIndex = 1:8 |
||||
|
frameF((subFrameIndex - 1) * 128 + 1:subFrameIndex * 128, channel) = mdct4(subFrames(:, subFrameIndex)); |
||||
|
end |
||||
|
end |
||||
|
end |
||||
|
end |
@ -0,0 +1,41 @@ |
|||||
|
function x = iAACoder2(AACSeq2, fNameOut) |
||||
|
%Implementation of WHAT?? //TODO!! |
||||
|
% Usage x = iAACoder2(AACSeq2, fNameOut), where: |
||||
|
% Inputs |
||||
|
% - fNameOut is the filename and path of the file that will be |
||||
|
% written after decoding |
||||
|
% - AACSeq2 is an array of structs containing K structs, where K is |
||||
|
% the number of computed frames. Every struct of the array consists |
||||
|
% of a frameType, a winType, chl.TNScoeffs which are the quantized TNS |
||||
|
% coefficients of this frame's left channel, chr.TNScoeffs which are |
||||
|
% the quantized TNS coefficients of this frame's right channel, |
||||
|
% chl.frameF which are the MDCT coefficients of this frame's left |
||||
|
% channel, chr.frameF which are the MDCT coefficients of this frame's |
||||
|
% right channel |
||||
|
% |
||||
|
% Output |
||||
|
% - x is an array containing the decoded audio samples |
||||
|
|
||||
|
% Initializes an array to hold the decoded samples |
||||
|
decodedAudio(1024 * (length(AACSeq2) + 1), 2) = 0; |
||||
|
% Initializes an array to hold both audio channels |
||||
|
frameF(1024, 2) = 0; |
||||
|
|
||||
|
% Decodes audio file |
||||
|
for i = 0:length(AACSeq2)-1 |
||||
|
currFrameStart = i * 1024 + 1; |
||||
|
currFrameStop = currFrameStart + 2047; |
||||
|
frameF(:, 1) = AACSeq2(i+1).chl.frameF; |
||||
|
frameF(:, 2) = AACSeq2(i+1).chr.frameF; |
||||
|
TNScoeffsL = AACSeq2(i + 1).chl.TNScoeffs; |
||||
|
TNScoeffsR = AACSeq2(i + 1).chr.TNScoeffs; |
||||
|
frameF(:, 1) = iTNS(frameF(:, 1), AACSeq2(i+1).frameType, TNScoeffsL); |
||||
|
frameF(:, 2) = iTNS(frameF(:, 2), AACSeq2(i+1).frameType, TNScoeffsR); |
||||
|
frameT = iFilterbank(frameF, AACSeq2(i+1).frameType, AACSeq2(i+1).winType); |
||||
|
|
||||
|
decodedAudio(currFrameStart:currFrameStop, :) = decodedAudio(currFrameStart:currFrameStop, :) + frameT; |
||||
|
end |
||||
|
|
||||
|
audiowrite(fNameOut, decodedAudio, 48000); |
||||
|
x = decodedAudio; |
||||
|
end |
@ -0,0 +1,103 @@ |
|||||
|
function frameT = iFilterbank(frameF, frameType, winType) |
||||
|
%Implementation of the Inverse Filter Bank step |
||||
|
% Usage frameT = iFilterbank(frameF, frameType, winType), where: |
||||
|
% Inputs |
||||
|
% - frameF is the frame in the frequency domain, in MDCT coefficients |
||||
|
% representation containing both channels of the audio stored in an |
||||
|
% array of dimensions 1024X2 |
||||
|
% - frameType is the type of the current frame in string |
||||
|
% representation, can be one of "OLS" (ONLY_LONG_SEQUENCE), "LSS" |
||||
|
% (LONG_START_SEQUENCE), "ESH" (EIGHT_SHORT_SEQUENCE), "LPS" |
||||
|
% (LONG_STOP_SEQUENCE) |
||||
|
% - winType is the type of the window selected, can be one of "KBD", |
||||
|
% "SIN" |
||||
|
% |
||||
|
% Output |
||||
|
% - frameT is a frame in the time domain, containing both channels of |
||||
|
% the audio stored in an array of dimensions 2048X2 |
||||
|
|
||||
|
% Declares persistent windows variables and initializes if empty |
||||
|
persistent kaiserWindowLong kaiserWindowShort sinWindowLong sinWindowShort; |
||||
|
if isempty(kaiserWindowLong) || isempty(kaiserWindowShort) || ... |
||||
|
isempty(sinWindowLong) || isempty(sinWindowShort) |
||||
|
kaiserLong = kaiser(1024, 6*pi); |
||||
|
kaiserSumLong = sum(kaiserLong); |
||||
|
kaiserShort = kaiser(128, 4*pi); |
||||
|
kaiserSumShort = sum(kaiserShort); |
||||
|
|
||||
|
for n = 1:1024 |
||||
|
kaiserWindowLong(n) = sqrt(sum(kaiserLong(1:n))/kaiserSumLong); |
||||
|
kaiserWindowLong(1024 + n) = sqrt(sum(kaiserLong(1:end-n+1))/kaiserSumLong); |
||||
|
|
||||
|
sinWindowLong(n) = sin(pi*(n + 0.5)/2048); |
||||
|
sinWindowLong(1024 + n) = sin(pi*(1024 + n + 0.5)/2048); |
||||
|
|
||||
|
end |
||||
|
|
||||
|
for n = 1:128 |
||||
|
kaiserWindowShort(n) = sqrt(sum(kaiserShort(1:n))/kaiserSumShort); |
||||
|
kaiserWindowShort(128 + n) = sqrt(sum(kaiserShort(1:end-n+1))/kaiserSumShort); |
||||
|
|
||||
|
sinWindowShort(n) = sin(pi*(n + 0.5)/256); |
||||
|
sinWindowShort(128 + n) = sin(pi*(128 + n + 0.5)/256); |
||||
|
end |
||||
|
end |
||||
|
|
||||
|
frameT(2048, 2) = 0; |
||||
|
|
||||
|
% Applies appropriate window to the frame |
||||
|
for channel=1:2 |
||||
|
if strcmp(frameType, 'OLS') |
||||
|
frameT(:, channel) = imdct4(frameF(:, channel)); |
||||
|
|
||||
|
if strcmp(winType, 'KBD') |
||||
|
frameT(:, channel) = frameT(:, channel) .* kaiserWindowLong(:); |
||||
|
elseif strcmp(winType, 'SIN') |
||||
|
frameT(:, channel) = frameT(:, channel) .* sinWindowLong(:); |
||||
|
else |
||||
|
error('filterbank, l[20]: Unsupported window type input!') |
||||
|
end |
||||
|
elseif strcmp(frameType, 'LSS') |
||||
|
frameT(:, channel) = imdct4(frameF(:, channel)); |
||||
|
|
||||
|
if strcmp(winType, 'KBD') |
||||
|
frameT(1:1024, channel) = frameT(1:1024, channel) .* kaiserWindowLong(1:1024)'; |
||||
|
frameT(1473:1600, channel) = frameT(1473:1600, channel) .* kaiserWindowShort(129:end)'; |
||||
|
frameT(1601:end, channel) = 0; |
||||
|
elseif strcmp(winType, 'SIN') |
||||
|
frameT(1:1024, channel) = frameT(1:1024, channel) .* sinWindowLong(1:1024)'; |
||||
|
frameT(1473:1600, channel) = frameT(1473:1600, channel) .* sinWindowShort(129:end)'; |
||||
|
frameT(1601:end, channel) = 0; |
||||
|
else |
||||
|
error('filterbank, l[20]: Unsupported window type input!') |
||||
|
end |
||||
|
elseif strcmp(frameType, 'LPS') |
||||
|
frameT(:, channel) = imdct4(frameF(:, channel)); |
||||
|
|
||||
|
if strcmp(winType, 'KBD') |
||||
|
frameT(1:448, channel) = 0; |
||||
|
frameT(449:576, channel) = frameT(449:576, channel) .* kaiserWindowShort(1:128)'; |
||||
|
frameT(1025:end, channel) = frameT(1025:end, channel) .* kaiserWindowLong(1025:end)'; |
||||
|
elseif strcmp(winType, 'SIN') |
||||
|
frameT(1:448, channel) = 0; |
||||
|
frameT(449:576, channel) = frameT(449:576, channel) .* sinWindowShort(1:128)'; |
||||
|
frameT(1025:end, channel) = frameT(1025:end, channel) .* sinWindowLong(1025:end)'; |
||||
|
else |
||||
|
error('filterbank, l[20]: Unsupported window type input!') |
||||
|
end |
||||
|
elseif strcmp(frameType, 'ESH') |
||||
|
for subFrameIndex = 1:8 |
||||
|
subFrame = imdct4(frameF((subFrameIndex - 1) * 128 + 1:subFrameIndex * 128, channel)); |
||||
|
|
||||
|
if strcmp(winType, 'KBD') |
||||
|
subFrame = subFrame .* kaiserWindowShort'; |
||||
|
elseif strcmp(winType, 'SIN') |
||||
|
subFrame = subFrame .* sinWindowShort'; |
||||
|
end |
||||
|
|
||||
|
frameT(448 + (subFrameIndex - 1) * 128 + 1:448 + (subFrameIndex + 1) * 128, channel) = ... |
||||
|
frameT(448 + (subFrameIndex - 1) * 128 + 1:448 + (subFrameIndex + 1) * 128, channel) + subFrame; |
||||
|
end |
||||
|
end |
||||
|
end |
||||
|
end |
@ -0,0 +1,38 @@ |
|||||
|
function frameFout = iTNS(frameFin, frameType, TNScoeffs) |
||||
|
%Implementation of the reverse TNS step |
||||
|
% Usage frameFout = iTNS(frameFin, frameType, TNScoeffs), where: |
||||
|
% Inputs |
||||
|
% - frameFin is the frame in the frequency domain, in MDCT coefficients |
||||
|
% representation containing both channels of the audio stored in an |
||||
|
% array of dimensions 1024X2 |
||||
|
% - frameType is the type of the current frame in string |
||||
|
% representation, can be one of "OLS" (ONLY_LONG_SEQUENCE), "LSS" |
||||
|
% (LONG_START_SEQUENCE), "ESH" (EIGHT_SHORT_SEQUENCE), "LPS" |
||||
|
% (LONG_STOP_SEQUENCE) |
||||
|
% - TNScoeffs is the quantized TNS coefficients array of dimensions |
||||
|
% 4X8 for EIGHT_SHORT_SEQUENCE frames and 4X1 otherwise |
||||
|
% |
||||
|
% Output |
||||
|
% - frameFout is the frame in the frequency domain after Temporal Noise |
||||
|
% Shaping, in MDCT coefficients representation containing both channels |
||||
|
% of the audio stored in an array of dimensions 1024X2 |
||||
|
|
||||
|
% Declares persistent variable holding the TNS tables and initializes if empty |
||||
|
persistent TNSTables; |
||||
|
if isempty(TNSTables) |
||||
|
TNSTables = load('TableB219.mat'); |
||||
|
end |
||||
|
|
||||
|
if ~strcmp(frameType, 'ESH') |
||||
|
% Inverses MDCT coefficients filtering |
||||
|
frameFout = filter(1, [1 (-1 * TNScoeffs)], frameFin); |
||||
|
else |
||||
|
for subFrameIndex = 1:8 |
||||
|
subFrame = frameFin((subFrameIndex - 1) * 128 + 1:subFrameIndex * 128); |
||||
|
|
||||
|
% Inverses MDCT coefficients filtering |
||||
|
frameFout((subFrameIndex - 1) * 128 + 1:subFrameIndex * 128) = ... |
||||
|
filter(1, [1 (-1 * TNScoeffs)], subFrame); |
||||
|
end |
||||
|
end |
||||
|
end |
@ -0,0 +1,61 @@ |
|||||
|
|
||||
|
function y = imdct4(x) |
||||
|
% IMDCT4 Calculates the Modified Discrete Cosine Transform |
||||
|
% y = imdct4(x) |
||||
|
% |
||||
|
% x: input signal (can be either a column or frame per column) |
||||
|
% y: IMDCT of x |
||||
|
% |
||||
|
% Vectorize ! ! ! |
||||
|
|
||||
|
% ------- imdct4.m ----------------------------------------- |
||||
|
% Marios Athineos, marios@ee.columbia.edu |
||||
|
% http://www.ee.columbia.edu/~marios/ |
||||
|
% Copyright (c) 2002 by Columbia University. |
||||
|
% All rights reserved. |
||||
|
% ---------------------------------------------------------- |
||||
|
|
||||
|
[flen,fnum] = size(x); |
||||
|
% Make column if it's a single row |
||||
|
if (flen==1) |
||||
|
x = x(:); |
||||
|
flen = fnum; |
||||
|
fnum = 1; |
||||
|
end |
||||
|
|
||||
|
% We need these for furmulas below |
||||
|
N = flen; |
||||
|
M = N/2; |
||||
|
twoN = 2*N; |
||||
|
sqrtN = sqrt(twoN); |
||||
|
|
||||
|
% We need this twice so keep it around |
||||
|
t = (0:(M-1)).'; |
||||
|
w = diag(sparse(exp(-j*2*pi*(t+1/8)/twoN))); |
||||
|
|
||||
|
% Pre-twiddle |
||||
|
t = (0:(M-1)).'; |
||||
|
c = x(2*t+1,:) + j*x(N-1-2*t+1,:); |
||||
|
c = (0.5*w)*c; |
||||
|
|
||||
|
% FFT for N/2 points only !!! |
||||
|
c = fft(c,M); |
||||
|
|
||||
|
% Post-twiddle |
||||
|
c = ((8/sqrtN)*w)*c; |
||||
|
|
||||
|
% Preallocate rotation matrix |
||||
|
rot = zeros(twoN,fnum); |
||||
|
|
||||
|
% Sort |
||||
|
t = (0:(M-1)).'; |
||||
|
rot(2*t+1,:) = real(c(t+1,:)); |
||||
|
rot(N+2*t+1,:) = imag(c(t+1,:)); |
||||
|
t = (1:2:(twoN-1)).'; |
||||
|
rot(t+1,:) = -rot(twoN-1-t+1,:); |
||||
|
|
||||
|
% Shift |
||||
|
t = (0:(3*M-1)).'; |
||||
|
y(t+1,:) = rot(t+M+1,:); |
||||
|
t = (3*M:(twoN-1)).'; |
||||
|
y(t+1,:) = -rot(t-3*M+1,:); |
@ -0,0 +1,75 @@ |
|||||
|
|
||||
|
function y = mdct4(x) |
||||
|
% MDCT4 Calculates the Modified Discrete Cosine Transform |
||||
|
% y = mdct4(x) |
||||
|
% |
||||
|
% Use either a Sine or a Kaiser-Bessel Derived window (KBDWin)with |
||||
|
% 50% overlap for perfect TDAC reconstruction. |
||||
|
% Remember that MDCT coefs are symmetric: y(k)=-y(N-k-1) so the full |
||||
|
% matrix (N) of coefs is: yf = [y;-flipud(y)]; |
||||
|
% |
||||
|
% x: input signal (can be either a column or frame per column) |
||||
|
% length of x must be a integer multiple of 4 (each frame) |
||||
|
% y: MDCT of x (coefs are divided by sqrt(N)) |
||||
|
% |
||||
|
% Vectorize ! ! ! |
||||
|
|
||||
|
% ------- mdct4.m ------------------------------------------ |
||||
|
% Marios Athineos, marios@ee.columbia.edu |
||||
|
% http://www.ee.columbia.edu/~marios/ |
||||
|
% Copyright (c) 2002 by Columbia University. |
||||
|
% All rights reserved. |
||||
|
% ---------------------------------------------------------- |
||||
|
|
||||
|
[flen,fnum] = size(x); |
||||
|
% Make column if it's a single row |
||||
|
if (flen==1) |
||||
|
x = x(:); |
||||
|
flen = fnum; |
||||
|
fnum = 1; |
||||
|
end |
||||
|
% Make sure length is multiple of 4 |
||||
|
if (rem(flen,4)~=0) |
||||
|
error('MDCT4 defined for lengths multiple of four.'); |
||||
|
end |
||||
|
|
||||
|
% We need these for furmulas below |
||||
|
N = flen; % Length of window |
||||
|
M = N/2; % Number of coefficients |
||||
|
N4 = N/4; % Simplify the way eqs look |
||||
|
sqrtN = sqrt(N); |
||||
|
|
||||
|
% Preallocate rotation matrix |
||||
|
% It would be nice to be able to do it in-place but we cannot |
||||
|
% cause of the prerotation. |
||||
|
rot = zeros(flen,fnum); |
||||
|
|
||||
|
% Shift |
||||
|
t = (0:(N4-1)).'; |
||||
|
rot(t+1,:) = -x(t+3*N4+1,:); |
||||
|
t = (N4:(N-1)).'; |
||||
|
rot(t+1,:) = x(t-N4+1,:); |
||||
|
clear x; |
||||
|
|
||||
|
% We need this twice so keep it around |
||||
|
t = (0:(N4-1)).'; |
||||
|
w = diag(sparse(exp(-j*2*pi*(t+1/8)/N))); |
||||
|
|
||||
|
% Pre-twiddle |
||||
|
t = (0:(N4-1)).'; |
||||
|
c = (rot(2*t+1,:)-rot(N-1-2*t+1,:))... |
||||
|
-j*(rot(M+2*t+1,:)-rot(M-1-2*t+1,:)); |
||||
|
% This is a really cool Matlab trick ;) |
||||
|
c = 0.5*w*c; |
||||
|
clear rot; |
||||
|
|
||||
|
% FFT for N/4 points only !!! |
||||
|
c = fft(c,N4); |
||||
|
|
||||
|
% Post-twiddle |
||||
|
c = (2/sqrtN)*w*c; |
||||
|
|
||||
|
% Sort |
||||
|
t = (0:(N4-1)).'; |
||||
|
y(2*t+1,:) = real(c(t+1,:)); |
||||
|
y(M-1-2*t+1,:) = -imag(c(t+1,:)); |
Loading…
Reference in new issue