Apostolos Fanakis
6 years ago
20 changed files with 1218 additions and 0 deletions
@ -0,0 +1,123 @@ |
|||
function AACSeq3 = AACoder3(fNameIn, fnameAACoded) |
|||
%Implementation of AAC encoder |
|||
% Usage AACSeq3 = AACoder3(fNameIn, fnameAACoded), where: |
|||
% Inputs |
|||
% - fNameIn is the filename and path of the file to encode |
|||
% - frameAACoded is the filename and path of the mat file that will |
|||
% be written after encoding |
|||
% |
|||
% Output |
|||
% - AACSeq3 is an array of structs containing K structs, where K is |
|||
% the number of computed frames. Every struct of the array consists |
|||
% of: |
|||
% * a frameType, |
|||
% * a winType, |
|||
% * chl.TNScoeffs which are the quantized TNS coefficients of |
|||
% this frame's left channel, |
|||
% * chr.TNScoeffs which are the quantized TNS coefficients of |
|||
% this frame's right channel, |
|||
% * chl.T which are the psychoacoustic thresholds of this frame's |
|||
% left channel, |
|||
% * chr.T which are the psychoacoustic thresholds of this frame's |
|||
% right channel, |
|||
% * chl.G which are the quantized global gains of this frame's |
|||
% left channel, |
|||
% * chr.G which are the quantized global gains of this frame's |
|||
% right channel, |
|||
% * chl.sfc which is the Huffman encoded sfc sequence of this |
|||
% frame's left channel, |
|||
% * chr.sfc which is the Huffman encoded sfc sequence of this |
|||
% frame's right channel, |
|||
% * chl.stream which is the Huffman encoded quantized MDCT |
|||
% sequence of this frame's left channel, |
|||
% * chr.stream which is the Huffman encoded quantized MDCT |
|||
% sequence of this frame's right channel, |
|||
% * chl.codebook which is the Huffman codebook used for this |
|||
% frame's left channel |
|||
% * chr.codebook which is the Huffman codebook used for this |
|||
% frame's right channel |
|||
|
|||
% Declares constant window type |
|||
WINDOW_TYPE = 'SIN'; |
|||
|
|||
% Reads the audio file |
|||
[originalAudioData, ~] = audioread(fNameIn); |
|||
|
|||
% Splits the audio in frames and determines the type of each frame |
|||
frameTypes{fix((length(originalAudioData) - 1025) / 1024)} = 'OLS'; |
|||
frameTypes{1} = 'OLS'; |
|||
for i = 1:length(frameTypes) - 2 |
|||
nextFrameStart = (i + 1) * 1024 + 1; |
|||
nextFrameStop = nextFrameStart + 2047; |
|||
frameTypes{i+1} = SSC(1, originalAudioData(nextFrameStart:nextFrameStop, :), frameTypes{i}); |
|||
end |
|||
|
|||
% Assigns a type to the last frame |
|||
if strcmp(frameTypes{length(frameTypes) - 1}, 'LSS') |
|||
frameTypes{length(frameTypes)} = 'ESH'; |
|||
elseif strcmp(frameTypes{length(frameTypes) - 1}, 'ESH') |
|||
frameTypes{length(frameTypes)} = 'ESH'; |
|||
else |
|||
frameTypes{length(frameTypes)} = 'OLS'; |
|||
end |
|||
|
|||
% Encodes audio file |
|||
huffLUT = loadLUT(); |
|||
AACSeq3(length(frameTypes)) = struct; |
|||
for i = 0:length(frameTypes) - 1 |
|||
currFrameStart = i * 1024 + 1; |
|||
currFrameStop = currFrameStart + 2047; |
|||
frameF = filterbank(originalAudioData(currFrameStart:currFrameStop, :), frameTypes{i+1}, WINDOW_TYPE); |
|||
[frameF(:, 1), TNScoeffsL] = TNS(frameF(:, 1), frameTypes{i+1}); |
|||
[frameF(:, 2), TNScoeffsR] = TNS(frameF(:, 2), frameTypes{i+1}); |
|||
|
|||
if i < 2 |
|||
% TODO: what happens on the first two frames? |
|||
else |
|||
prev1FrameStart = (i - 1) * 1024 + 1; |
|||
prev1FrameStop = prev1FrameStart + 2047; |
|||
prev2FrameStart = (i - 2) * 1024 + 1; |
|||
prev2FrameStop = prev1FrameStart + 2047; |
|||
SMR = psycho(... |
|||
originalAudioData(currFrameStart:currFrameStop, :), ... |
|||
frameTypes{i+1}, ... |
|||
originalAudioData(prev1FrameStart:prev1FrameStop, :), ... |
|||
originalAudioData(prev2FrameStart:prev2FrameStop, :)); |
|||
[SL, sfcL, GL] = AACquantizer(frameF, frameTypes{i+1}, SMR); |
|||
[SR, sfcR, GR] = AACquantizer(frameF, frameTypes{i+1}, SMR); |
|||
end |
|||
|
|||
[streamL, huffcodebookL] = encodeHuff(SL, huffLUT); |
|||
[streamR, huffcodebookR] = encodeHuff(SR, huffLUT); |
|||
[sfcL, ~] = encodeHuff(sfcL, huffLUT, 12); |
|||
[sfcR, ~] = encodeHuff(sfcR, huffLUT, 12); |
|||
|
|||
AACSeq3(i + 1).frameType = frameTypes(i + 1); |
|||
AACSeq3(i + 1).winType = WINDOW_TYPE; |
|||
AACSeq3(i + 1).chl.TNScoeffs = TNScoeffsL; |
|||
AACSeq3(i + 1).chr.TNScoeffs = TNScoeffsR; |
|||
AACSeq3(i + 1).chl.T = what; % TODO: find dis shit |
|||
AACSeq3(i + 1).chr.T = what; |
|||
AACSeq3(i + 1).chl.G = GL; |
|||
AACSeq3(i + 1).chr.G = GR; |
|||
AACSeq3(i + 1).chl.sfc = sfcL; |
|||
AACSeq3(i + 1).chr.sfc = sfcR; |
|||
AACSeq3(i + 1).chl.stream = streamL; |
|||
AACSeq3(i + 1).chr.stream = streamR; |
|||
AACSeq3(i + 1).chl.codebook = huffcodebookL; |
|||
AACSeq3(i + 1).chr.codebook = huffcodebookR; |
|||
end |
|||
|
|||
save(fnameAACoded,AACSeq3); |
|||
|
|||
if false |
|||
[idx,label] = grp2idx(sort(frameTypes)); |
|||
hist(idx,unique(idx)); |
|||
set(gca,'xTickLabel',label) |
|||
|
|||
sum(idx(:) == 1) |
|||
sum(idx(:) == 2) |
|||
sum(idx(:) == 3) |
|||
sum(idx(:) == 4) |
|||
end |
|||
end |
@ -0,0 +1,26 @@ |
|||
function [S, sfc, G] = AACquantizer(frameF, frameType, SMR) |
|||
%Implementation of Quantizer |
|||
% Usage [S, sfc, G] = AACquantizer(frameF, frameType, SMR), where: |
|||
% Inputs |
|||
% - frameF is the frame in the frequency domain, in MDCT coefficients |
|||
% representation containing only one of the audio channels stored in |
|||
% a vector of length 1024 |
|||
% - frameType is the type of the current frame in string |
|||
% representation, can be one of "OLS" (ONLY_LONG_SEQUENCE), "LSS" |
|||
% (LONG_START_SEQUENCE), "ESH" (EIGHT_SHORT_SEQUENCE), "LPS" |
|||
% (LONG_STOP_SEQUENCE) |
|||
% - SMR is the signal to mask ratio array of dimensions 42X8 for |
|||
% EIGHT_SHORT_SEQUENCE frames and 69X1 otherwise |
|||
% |
|||
% Output |
|||
% - S are the MDCT quantization symbols of one audio channel stored |
|||
% in a vector of length 1024 |
|||
% - sfc are the scalefactors per band stored in an array of |
|||
% dimensions NBX8 for EIGHT_SHORT_SEQUENCE frames and NBX1 |
|||
% otherwise, where NB is the number of bands |
|||
% - G is the global gain stored in an array of dimensions 1X8 for |
|||
% EIGHT_SHORT_SEQUENCE frames and a single value otherwise |
|||
|
|||
|
|||
end |
|||
|
Binary file not shown.
@ -0,0 +1,78 @@ |
|||
function frameType = SSC(~, nextFrameT, prevFrameType) |
|||
%Implementation of the Sequence Segmentation Control step |
|||
% Usage frameType = SSC(frameT, nextFrameT, prevFrameType), where: |
|||
% Inputs |
|||
% - frameT is a frame in the time domain, containing both channels of |
|||
% the audio stored in an array of dimensions 2048X2 |
|||
% - nextFrameT is the next frame in the time domain, containing both |
|||
% channels of the audio stored in an array of dimensions 2048X2 |
|||
% - prevFrameType is the type of the previous frame in string |
|||
% representation, can be one of "OLS" (ONLY_LONG_SEQUENCE), "LSS" |
|||
% (LONG_START_SEQUENCE), "ESH" (EIGHT_SHORT_SEQUENCE), "LPS" |
|||
% (LONG_STOP_SEQUENCE) |
|||
% |
|||
% Output |
|||
% - frameType is the type of the current frame in string |
|||
% representation, can be one of "OLS" (ONLY_LONG_SEQUENCE), "LSS" |
|||
% (LONG_START_SEQUENCE), "ESH" (EIGHT_SHORT_SEQUENCE), "LPS" |
|||
% (LONG_STOP_SEQUENCE) |
|||
|
|||
% Examines the cases where the determination of the type of the next |
|||
% frame isn't needed for |
|||
if strcmp(prevFrameType, 'LSS') |
|||
frameType = 'ESH'; |
|||
return; |
|||
elseif strcmp(prevFrameType, 'LPS') |
|||
frameType = 'OLS'; |
|||
return; |
|||
end |
|||
|
|||
% Determines the type of the next frame |
|||
% Filters frame |
|||
nextFrameT = filter([0.7548 -0.7548], [1 -0.5095], nextFrameT, [], 1); |
|||
channelFrameType = {'nan', 'nan'}; |
|||
for channel = 1:2 |
|||
% Calculates sub-frame energy estimation |
|||
[subFrames, ~] = buffer(nextFrameT(577:end - 448, channel), 128, 0, 'nodelay'); |
|||
energyEstimations = sum(subFrames .^ 2, 1); |
|||
|
|||
% Calculates the ratio of the sub-frame energy to the average energy of |
|||
% the previous sub-frames |
|||
energyRatios = movmean(energyEstimations, [8 0]); |
|||
energyRatios = energyEstimations ./ [energyEstimations(1) energyRatios(1:end-1)]; |
|||
|
|||
if ~isempty(find(energyEstimations > 10^(-3), 1)) && ... |
|||
~isempty(find(energyRatios(energyEstimations > 10^(-3)) > 10, 1)) |
|||
% Next frame is ESH |
|||
if strcmp(prevFrameType, 'ESH') |
|||
% This frame of this channel is an EIGHT_SHORT_SEQUENCE type |
|||
% frame. This means the frames of both channels will be encoded |
|||
% as EIGHT_SHORT_SEQUENCE type frames. |
|||
frameType = 'ESH'; |
|||
return; |
|||
elseif strcmp(prevFrameType, 'OLS') |
|||
channelFrameType{channel} = 'LSS'; |
|||
end |
|||
else |
|||
if strcmp(prevFrameType, 'ESH') |
|||
channelFrameType{channel} = 'LPS'; |
|||
elseif strcmp(prevFrameType, 'OLS') |
|||
channelFrameType{channel} = 'OLS'; |
|||
end |
|||
end |
|||
end |
|||
|
|||
% Joins decision for both channels |
|||
if strcmp(channelFrameType{1}, 'nan') || strcmp(channelFrameType{2}, 'nan') |
|||
error('SSC, l[73]: Internal error occurred!') |
|||
end |
|||
if strcmp(channelFrameType{1}, 'OLS') |
|||
frameType = channelFrameType{2}; |
|||
elseif strcmp(channelFrameType{2}, 'OLS') |
|||
frameType = channelFrameType{1}; |
|||
elseif strcmp(channelFrameType{1}, channelFrameType{2}) |
|||
frameType = channelFrameType{1}; |
|||
else |
|||
frameType = 'ESH'; |
|||
end |
|||
end |
@ -0,0 +1,119 @@ |
|||
function [frameFout, TNScoeffs] = TNS(frameFin, frameType) |
|||
%Implementation of the TNS step |
|||
% Usage [frameFout, TNScoeffs] = TNS(frameFin, frameType), where: |
|||
% Inputs |
|||
% - frameFin is the frame in the frequency domain, in MDCT coefficients |
|||
% representation containing both channels of the audio stored in an |
|||
% array of dimensions 1024X2 |
|||
% - frameType is the type of the current frame in string |
|||
% representation, can be one of "OLS" (ONLY_LONG_SEQUENCE), "LSS" |
|||
% (LONG_START_SEQUENCE), "ESH" (EIGHT_SHORT_SEQUENCE), "LPS" |
|||
% (LONG_STOP_SEQUENCE) |
|||
% |
|||
% Output |
|||
% - frameFout is the frame in the frequency domain after Temporal Noise |
|||
% Shaping, in MDCT coefficients representation containing both channels |
|||
% of the audio stored in an array of dimensions 1024X2 |
|||
% - TNScoeffs is the quantized TNS coefficients array of dimensions |
|||
% 4X8 for EIGHT_SHORT_SEQUENCE frames and 4X1 otherwise |
|||
|
|||
% Declares constant numbers of bands for long and short windows |
|||
LONG_WINDOW_NUMBER_OF_BANDS = 69; |
|||
SHORT_WINDOW_NUMBER_OF_BANDS = 42; |
|||
% Declares constant order of the linear prediction filter |
|||
LPF_ORDER = 4; |
|||
% Declares constant coefficients' resolution, in the form of number of |
|||
% decimal digits used |
|||
COEF_RES = 1; |
|||
|
|||
% Declares persistent variable holding the TNS tables and initializes if empty |
|||
persistent TNSTables; |
|||
if isempty(TNSTables) |
|||
TNSTables = load('TableB219.mat'); |
|||
end |
|||
|
|||
if ~strcmp(frameType, 'ESH') |
|||
% Calculates the energy per band |
|||
bandEnergy(LONG_WINDOW_NUMBER_OF_BANDS) = 0; |
|||
for band = 1:LONG_WINDOW_NUMBER_OF_BANDS - 1 |
|||
bandEnergy(band) = sumsqr(frameFin(TNSTables.B219a(band, 2) + 1:TNSTables.B219a(band + 1, 2))); |
|||
end |
|||
bandEnergy(LONG_WINDOW_NUMBER_OF_BANDS) = sumsqr(frameFin( ... |
|||
TNSTables.B219a(LONG_WINDOW_NUMBER_OF_BANDS, 2) + 1:end)); |
|||
|
|||
% Calculates the normalization factors |
|||
bandIndices = quantiz(0:1023, TNSTables.B219a(:, 2) - 1); |
|||
normalizationFactor = sqrt(bandEnergy(bandIndices)); |
|||
|
|||
% Smooths normalization factors |
|||
for normIndex = 1023:-1:1 |
|||
normalizationFactor(normIndex) = (normalizationFactor(normIndex) + normalizationFactor(normIndex + 1)) / 2; |
|||
end |
|||
for normIndex = 2:1024 |
|||
normalizationFactor(normIndex) = (normalizationFactor(normIndex) + normalizationFactor(normIndex - 1)) / 2; |
|||
end |
|||
|
|||
% Normalizes MDCT coefficients according to each band energy |
|||
normalizedFrameFin = frameFin ./ normalizationFactor'; |
|||
|
|||
% Calculates the linear prediction coefficients |
|||
[linPredCoeff, ~] = lpc(normalizedFrameFin, LPF_ORDER); |
|||
|
|||
% Quantizes these coefficients |
|||
quantizedLinPredCoeff = round(linPredCoeff, COEF_RES); |
|||
|
|||
% Filters MDCT coefficients |
|||
if ~isstable(1, quantizedLinPredCoeff) |
|||
error('TNS, l[79]: Inverse filter not stable!'); |
|||
else |
|||
TNScoeffs = quantizedLinPredCoeff(2:end); |
|||
frameFout = filter(quantizedLinPredCoeff, 1, frameFin); |
|||
end |
|||
else |
|||
% Initializes output vectors |
|||
TNScoeffs(LPF_ORDER * 8) = 0; |
|||
frameFout(1024) = 0; |
|||
|
|||
bandEnergy(SHORT_WINDOW_NUMBER_OF_BANDS) = 0; |
|||
for subFrameIndex = 1:8 |
|||
subFrame = frameFin((subFrameIndex - 1) * 128 + 1:subFrameIndex * 128); |
|||
|
|||
% Calculates the energy per band |
|||
for band = 1:SHORT_WINDOW_NUMBER_OF_BANDS - 1 |
|||
bandEnergy(band) = sumsqr(subFrame(TNSTables.B219b(band, 2) + 1:TNSTables.B219b(band + 1, 2))); |
|||
end |
|||
bandEnergy(SHORT_WINDOW_NUMBER_OF_BANDS) = sumsqr(subFrame( ... |
|||
TNSTables.B219b(SHORT_WINDOW_NUMBER_OF_BANDS, 2) + 1:end)); |
|||
|
|||
% Calculates the normalization factors |
|||
bandIndices = quantiz(0:127, TNSTables.B219b(:, 2) - 1); |
|||
normalizationFactor = sqrt(bandEnergy(bandIndices)); |
|||
|
|||
% Smooths normalization factors |
|||
for normIndex = 127:-1:1 |
|||
normalizationFactor(normIndex) = (normalizationFactor(normIndex) + normalizationFactor(normIndex + 1)) / 2; |
|||
end |
|||
for normIndex = 2:128 |
|||
normalizationFactor(normIndex) = (normalizationFactor(normIndex) + normalizationFactor(normIndex - 1)) / 2; |
|||
end |
|||
|
|||
% Normalizes MDCT coefficients according to each band energy |
|||
normalizedFrameFin = subFrame ./ normalizationFactor'; |
|||
|
|||
% Calculates the linear prediction coefficients |
|||
[linPredCoeff, ~] = lpc(normalizedFrameFin, LPF_ORDER); |
|||
|
|||
% Quantizes these coefficients |
|||
quantizedLinPredCoeff = round(linPredCoeff, COEF_RES); |
|||
|
|||
% Filters MDCT coefficients |
|||
if ~isstable(1, quantizedLinPredCoeff) |
|||
error('TNS, l[79]: Inverse filter not stable!'); |
|||
else |
|||
TNScoeffs((subFrameIndex - 1) * 4 + 1:subFrameIndex * 4) = quantizedLinPredCoeff(2:end); |
|||
frameFout((subFrameIndex - 1) * 128 + 1:subFrameIndex * 128) = ... |
|||
filter(quantizedLinPredCoeff, 1, subFrame); |
|||
end |
|||
end |
|||
end |
|||
end |
Binary file not shown.
@ -0,0 +1,82 @@ |
|||
function decCoeffs = decodeHuff(huffSec, huffCodebook, huffLUT) |
|||
% DECODEHUFF Huffman decoder stage |
|||
% |
|||
% decCoeffs = decodeHuff(huffSec, huffCodebook, huffLUT) performs huffman |
|||
% decoding, where huffSec is a string of '1' and '0' corresponding to the |
|||
% Huffman encoded stream, and huffCodebook is the index (0 to 12) of the |
|||
% codebook used, as outputted by encodeHuff. huffLUT is the Huffman |
|||
% look-up tables to be loaded using loadLUT.m. The output decCoeffs is |
|||
% the decoded quantised (integer) values. |
|||
% |
|||
% CAUTION: due to zero padding the length of decCoeffs may be larger than |
|||
% the length of the encoded sequence. Simply ignore values (they should |
|||
% be equal to zero) that are outside the index range |
|||
% [1:length(encoded sequence)]. |
|||
|
|||
huffLUTi = huffLUT{huffCodebook}; |
|||
h=huffLUTi.invTable; |
|||
huffCodebook=huffLUTi.codebook; |
|||
nTupleSize=huffLUTi.nTupleSize; |
|||
maxAbsCodeVal=huffLUTi.maxAbsCodeVal; |
|||
signedValues=huffLUTi.signedValues; |
|||
eos=0;%end of stream |
|||
decCoeffs=[]; |
|||
|
|||
huffSec=huffSec-48; |
|||
streamIndex=1; |
|||
while(~eos)%end of stream |
|||
huffFailure=0; |
|||
wordbit=0; |
|||
r=1;%row indicator |
|||
b=huffSec(streamIndex+wordbit); |
|||
|
|||
%decoding tuple according to inverse matrix |
|||
while(1) |
|||
b=huffSec(streamIndex+wordbit); |
|||
wordbit=wordbit+1; |
|||
rOld=r; |
|||
r=h(rOld,b+1);%new index |
|||
if((h(r,1)==0)&(h(r,2)==0))%reached a leaf (found a valid word) |
|||
symbolIndex=h(r,3)-1;%The actual index begins from zero. |
|||
streamIndex=streamIndex+wordbit; |
|||
break; |
|||
end |
|||
end |
|||
%decoding nTuple magnitudes |
|||
if(signedValues) |
|||
base=2*maxAbsCodeVal+1; |
|||
nTupleDec=rem(floor(symbolIndex*base.^([-nTupleSize+1:1:0])),base); |
|||
nTupleDec=nTupleDec-maxAbsCodeVal; |
|||
else |
|||
base=maxAbsCodeVal+1; |
|||
nTupleDec=rem(floor(symbolIndex*base.^([-nTupleSize+1:1:0])),base); |
|||
nTupleSignBits=huffSec(streamIndex:streamIndex+nTupleSize-1); |
|||
nTupleSign=-sign(nTupleSignBits(:)'-0.5); |
|||
streamIndex=streamIndex+nTupleSize; |
|||
nTupleDec=nTupleDec.*nTupleSign; |
|||
end |
|||
escIndex=find(abs(nTupleDec)==16); |
|||
if((huffCodebook==11)&&(sum(escIndex))) |
|||
for i=1:length(escIndex) |
|||
b=huffSec(streamIndex); |
|||
N=0; |
|||
%reading the escape seq, counting N '1's |
|||
while(b) |
|||
N=N+1; |
|||
b=huffSec(streamIndex+N); |
|||
end%last bit read was a zero corresponding to esc_separator.. |
|||
streamIndex=streamIndex+N; |
|||
%reading the next N+4 bits |
|||
N4=N+4; |
|||
escape_word=huffSec(streamIndex+1:streamIndex+N4); |
|||
nTupleDec(escIndex(i))=2^N4+bin2dec(num2str(escape_word)); |
|||
streamIndex=streamIndex+N4+1; |
|||
end |
|||
nTupleDec(escIndex)=nTupleDec(escIndex).*nTupleSign(escIndex); |
|||
end |
|||
decCoeffs=[decCoeffs nTupleDec]; |
|||
if (streamIndex>length(huffSec)) |
|||
eos=1; |
|||
end |
|||
|
|||
end |
@ -0,0 +1,53 @@ |
|||
function [SNR, bitrate, compression] = demoAAC3(fNameIn, fNameOut, frameAACoded) |
|||
%Function that demonstrates usage of the level 3 code |
|||
% Usage [SNR, bitrate, compression] = demoAAC3(fNameIn, fNameOut, frameAACoded), where: |
|||
% Inputs |
|||
% - fNameIn is the filename and path of the file to encode |
|||
% - fNameOut is the filename and path of the wav file that will be |
|||
% written after decoding |
|||
% - frameAACoded is the filename and path of the mat file that will |
|||
% be written after encoding |
|||
% |
|||
% Output |
|||
% - SNR is the signal to noise ration computed after successively |
|||
% encoding and decoding the audio signal |
|||
% - bitrate is the bits per second |
|||
% - compression is the ratio of the bitrate before the encoding to |
|||
% the bitrate after it |
|||
|
|||
AACSeq3 = AACoder3(fNameIn, frameAACoded); |
|||
decodedAudio = iAACoder3(AACSeq3, fNameOut); |
|||
|
|||
[audioData, ~] = audioread(fNameIn); |
|||
|
|||
% figure() |
|||
% plot(audioData(1025:length(decodedAudio)-1024, 1) - decodedAudio(1025:end-1024, 1)) |
|||
% for frame = 1:length(AACSeq1) |
|||
% if strcmp(AACSeq1(frame).frameType, 'LSS') || strcmp(AACSeq1(frame).frameType, 'LPS') |
|||
% % Add lines |
|||
% h1 = line([(frame-1)*1024+1 (frame-1)*1024+1],[-2*10^(-15) 2*10^(-15)]); |
|||
% h2 = line([frame*1024+1 frame*1024+1],[-2*10^(-15) 2*10^(-15)]); |
|||
% % Set properties of lines |
|||
% set([h1 h2],'Color','y','LineWidth',2) |
|||
% % Add a patch |
|||
% patch([(frame-1)*1024+1 frame*1024+1 frame*1024+1 (frame-1)*1024+1],[-2*10^(-15) -2*10^(-15) 2*10^(-15) 2*10^(-15)],'y') |
|||
% elseif strcmp(AACSeq1(frame).frameType, 'ESH') |
|||
% % Add lines |
|||
% h1 = line([(frame-1)*1024+1 (frame-1)*1024+1],[-2*10^(-15) 2*10^(-15)]); |
|||
% h2 = line([frame*1024+1 frame*1024+1],[-2*10^(-15) 2*10^(-15)]); |
|||
% % Set properties of lines |
|||
% set([h1 h2],'Color','r','LineWidth',2) |
|||
% % Add a patch |
|||
% patch([(frame-1)*1024+1 frame*1024+1 frame*1024+1 (frame-1)*1024+1],[-2*10^(-15) -2*10^(-15) 2*10^(-15) 2*10^(-15)],'r') |
|||
% end |
|||
% end |
|||
% set(gca,'children',flipud(get(gca,'children'))) |
|||
% |
|||
% figure() |
|||
% plot(audioData(1025:length(decodedAudio)-1024, 2) - decodedAudio(1025:end-1024, 2)) |
|||
|
|||
snr(audioData(1025:length(decodedAudio)-1024, 1), audioData(1025:length(decodedAudio)-1024, 1) - decodedAudio(1025:end-1024, 1)) |
|||
snr(audioData(1025:length(decodedAudio)-1024, 2), audioData(1025:length(decodedAudio)-1024, 2) - decodedAudio(1025:end-1024, 2)) |
|||
SNR = 10*log10((sum(audioData(1025:length(decodedAudio)-1024, :)) .^ 2) ./ ... |
|||
(sum(audioData(1025:length(decodedAudio)-1024, :) - decodedAudio(1025:end-1024, :)) .^ 2)); |
|||
end |
@ -0,0 +1,194 @@ |
|||
function [huffSec, huffCodebook] = encodeHuff(coeffSec, huffLUT, forcedCodebook) |
|||
% ENCODEHUFF Huffman encoder stage |
|||
% |
|||
% [huffSec, huffCodebook] = encodeHuff(coeffSec, huffLUT) performs |
|||
% huffman coding for quantised (integer) values of a section coeffSec; |
|||
% huffLUT are the Huffman look-up tables to be loaded using loadLUT.m |
|||
% |
|||
% It returns huffSec, a string of '1' and '0' corresponding to the |
|||
% Huffman encoded stream, and huffCodebook, the number of the Huffman |
|||
% codebook used (see page 147 in w2203tfa.pdf and pages 82-94 in |
|||
% w2203tft.pdf). |
|||
% |
|||
% [huffSec, huffCodebook] = encodeHuff(coeffSec, huffLUT, forcedCodebook) |
|||
% forces the codebook forcedCodebook to be used. |
|||
|
|||
if nargin == 2 |
|||
[huffSec, huffCodebook] = encodeHuff1(coeffSec, huffLUT); |
|||
else |
|||
huffSec = huffLUTCode1(huffLUT{forcedCodebook}, coeffSec); |
|||
huffCodebook = forcedCodebook; |
|||
end |
|||
|
|||
function [huffSec, huffCodebook]=encodeHuff1(coeffSec,huffLUT) |
|||
maxAbsVal=max(abs(coeffSec)); |
|||
ESC=0;%escape sequence used? |
|||
switch(maxAbsVal) |
|||
case 0 |
|||
huffCodebook=0; |
|||
[huffSec]=huffLUTCode0(); |
|||
case 1 |
|||
huffCodebook=[1 2]; |
|||
% signedValues=1; |
|||
% nTupleSize=4; |
|||
% maxAbsCodeVal=1; |
|||
[huffSec1]=huffLUTCode1(huffLUT{huffCodebook(1)}, coeffSec); |
|||
[huffSec2]=huffLUTCode1(huffLUT{huffCodebook(2)}, coeffSec); |
|||
if(length(huffSec1)<=length(huffSec2)) |
|||
huffSec=huffSec1; |
|||
huffCodebook=huffCodebook(1); |
|||
else |
|||
huffSec=huffSec2; |
|||
huffCodebook=huffCodebook(2); |
|||
end |
|||
case 2 |
|||
huffCodebook=[3 4]; |
|||
% signedValues=0; |
|||
% nTupleSize=4; |
|||
% maxAbsCodeVal=2; |
|||
[huffSec1]=huffLUTCode1(huffLUT{huffCodebook(1)}, coeffSec); |
|||
[huffSec2]=huffLUTCode1(huffLUT{huffCodebook(2)}, coeffSec); |
|||
if(length(huffSec1)<=length(huffSec2)) |
|||
huffSec=huffSec1; |
|||
huffCodebook=huffCodebook(1); |
|||
else |
|||
huffSec=huffSec2; |
|||
huffCodebook=huffCodebook(2); |
|||
end |
|||
case {3,4} |
|||
huffCodebook=[5 6]; |
|||
[huffSec1]=huffLUTCode1(huffLUT{huffCodebook(1)}, coeffSec); |
|||
[huffSec2]=huffLUTCode1(huffLUT{huffCodebook(2)}, coeffSec); |
|||
if(length(huffSec1)<=length(huffSec2)) |
|||
huffSec=huffSec1; |
|||
huffCodebook=huffCodebook(1); |
|||
else |
|||
huffSec=huffSec2; |
|||
huffCodebook=huffCodebook(2); |
|||
end |
|||
case {5,6,7} |
|||
huffCodebook=[7 8]; |
|||
[huffSec1]=huffLUTCode1(huffLUT{huffCodebook(1)}, coeffSec); |
|||
[huffSec2]=huffLUTCode1(huffLUT{huffCodebook(2)}, coeffSec); |
|||
if(length(huffSec1)<=length(huffSec2)) |
|||
huffSec=huffSec1; |
|||
huffCodebook=huffCodebook(1); |
|||
else |
|||
huffSec=huffSec2; |
|||
huffCodebook=huffCodebook(2); |
|||
end |
|||
case {8,9,10,11,12} |
|||
huffCodebook=[9 10]; |
|||
[huffSec1]=huffLUTCode1(huffLUT{huffCodebook(1)}, coeffSec); |
|||
[huffSec2]=huffLUTCode1(huffLUT{huffCodebook(2)}, coeffSec); |
|||
if(length(huffSec1)<=length(huffSec2)) |
|||
huffSec=huffSec1; |
|||
huffCodebook=huffCodebook(1); |
|||
else |
|||
huffSec=huffSec2; |
|||
huffCodebook=huffCodebook(2); |
|||
end |
|||
case {13,14,15} |
|||
huffCodebook=11; |
|||
[huffSec]=huffLUTCode1(huffLUT{huffCodebook(1)}, coeffSec); |
|||
otherwise |
|||
huffCodebook=11; |
|||
[huffSec]=huffLUTCodeESC(huffLUT{huffCodebook(1)}, coeffSec); |
|||
end |
|||
|
|||
|
|||
|
|||
function [huffSec]=huffLUTCode1(huffLUT, coeffSec) |
|||
LUT=huffLUT.LUT; |
|||
huffCodebook=huffLUT.codebook; |
|||
nTupleSize=huffLUT.nTupleSize; |
|||
maxAbsCodeVal=huffLUT.maxAbsCodeVal; |
|||
signedValues=huffLUT.signedValues; |
|||
numTuples=ceil(length(coeffSec)/nTupleSize); |
|||
if(signedValues) |
|||
coeffSec=coeffSec+maxAbsCodeVal; |
|||
base=2*maxAbsCodeVal+1; |
|||
else |
|||
base=maxAbsCodeVal+1; |
|||
end |
|||
coeffSecPad=zeros(1,numTuples*nTupleSize); |
|||
coeffSecPad(1:length(coeffSec))=coeffSec; |
|||
for i=1:numTuples |
|||
nTuple=coeffSecPad((i-1)*nTupleSize+1:i*nTupleSize); |
|||
huffIndex=abs(nTuple)*(base.^[nTupleSize-1:-1:0])'; |
|||
hexHuff=LUT(huffIndex+1,3); |
|||
|
|||
hexHuff=dec2hex(hexHuff);%Dec values were saved. Converting to hex |
|||
huffSecLen=LUT(huffIndex+1,2); |
|||
if(signedValues) |
|||
huffSec{i}=dec2bin(hex2dec(hexHuff),huffSecLen); |
|||
else |
|||
huffSec{i}=[dec2bin(hex2dec(hexHuff),huffSecLen),strcat((num2str(nTuple'<0))')];%appending sign |
|||
end |
|||
end |
|||
huffSec=strcat([huffSec{:}]); |
|||
|
|||
function [huffSec]=huffLUTCode0() |
|||
huffSec=''; |
|||
|
|||
function [huffSec]=huffLUTCodeESC(huffLUT, coeffSec) |
|||
LUT=huffLUT.LUT; |
|||
huffCodebook=huffLUT.codebook; |
|||
nTupleSize=huffLUT.nTupleSize; |
|||
maxAbsCodeVal=huffLUT.maxAbsCodeVal; |
|||
signedValues=huffLUT.signedValues; |
|||
|
|||
numTuples=ceil(length(coeffSec)/nTupleSize); |
|||
base=maxAbsCodeVal+1; |
|||
coeffSecPad=zeros(1,numTuples*nTupleSize); |
|||
coeffSecPad(1:length(coeffSec))=coeffSec; |
|||
|
|||
nTupleOffset=zeros(1,nTupleSize); |
|||
for i=1:numTuples |
|||
nTuple=coeffSecPad((i-1)*nTupleSize+1:i*nTupleSize); |
|||
lnTuple=nTuple; |
|||
lnTuple(lnTuple==0)=eps; |
|||
N4=max([0 0; floor(log2(abs(lnTuple)))]); |
|||
N=max([0 0 ; N4-4]); |
|||
esc=abs(nTuple)>15; |
|||
|
|||
nTupleESC=nTuple; |
|||
nTupleESC(esc)=sign(nTupleESC(esc))*16;%Just keep the sixteens here (nTupleESC). nTuple contains the actual values |
|||
|
|||
huffIndex=abs(nTupleESC)*(base.^[nTupleSize-1:-1:0])'; |
|||
hexHuff=LUT(huffIndex+1,3); |
|||
hexHuff=dec2hex(hexHuff);%Dec values were saved. Converting to hex |
|||
huffSecLen=LUT(huffIndex+1,2); |
|||
|
|||
%Adding sufficient ones to the prefix. If N<=0 empty string created |
|||
escape_prefix1=''; |
|||
escape_prefix2=''; |
|||
escape_prefix1(1:N(1))='1'; |
|||
escape_prefix2(1:N(2))='1'; |
|||
|
|||
|
|||
%Calculating the escape words. Taking absolute values. Will add the |
|||
%sign later on |
|||
if esc(1) |
|||
escape_separator1='0'; |
|||
escape_word1=dec2bin(abs(nTuple(1))-2^(N4(1)),N4(1)); |
|||
else |
|||
escape_separator1=''; |
|||
escape_word1=''; |
|||
end |
|||
if esc(2) |
|||
escape_separator2='0'; |
|||
escape_word2=dec2bin(abs(nTuple(2))-2^(N4(2)),N4(2)); |
|||
else |
|||
escape_separator2=''; |
|||
escape_word2=''; |
|||
end |
|||
|
|||
% escape_word1=dec2bin(abs(nTuple(1))-2^(N4(1)),N4(1)); |
|||
|
|||
escSeq=[escape_prefix1, escape_separator1, escape_word1, escape_prefix2, escape_separator2, escape_word2]; |
|||
|
|||
%adding the sign bits and the escape sequence |
|||
huffSec{i}=[dec2bin(hex2dec(hexHuff),huffSecLen),strcat((num2str(nTuple'<0))'), escSeq];%appending sign |
|||
end |
|||
huffSec=strcat([huffSec{:}]); |
@ -0,0 +1,100 @@ |
|||
function frameF = filterbank(frameT, frameType, winType) |
|||
%Implementation of the Filter Bank step |
|||
% Usage frameF = filterbank(frameT, frameType, winType), where: |
|||
% Inputs |
|||
% - frameT is a frame in the time domain, containing both channels of |
|||
% the audio stored in an array of dimensions 2048X2 |
|||
% - frameType is the type of the current frame in string |
|||
% representation, can be one of "OLS" (ONLY_LONG_SEQUENCE), "LSS" |
|||
% (LONG_START_SEQUENCE), "ESH" (EIGHT_SHORT_SEQUENCE), "LPS" |
|||
% (LONG_STOP_SEQUENCE) |
|||
% - winType is the type of the window selected, can be one of "KBD", |
|||
% "SIN" |
|||
% |
|||
% Output |
|||
% - frameF is the frame in the frequency domain, in MDCT coefficients |
|||
% representation containing both channels of the audio stored in an |
|||
% array of dimensions 1024X2 |
|||
|
|||
% Declares persistent windows variables and initializes if empty |
|||
persistent kaiserWindowLong kaiserWindowShort sinWindowLong sinWindowShort; |
|||
if isempty(kaiserWindowLong) || isempty(kaiserWindowShort) || ... |
|||
isempty(sinWindowLong) || isempty(sinWindowShort) |
|||
kaiserLong = kaiser(1025, 6*pi); |
|||
kaiserSumLong = sum(kaiserLong); |
|||
kaiserShort = kaiser(129, 4*pi); |
|||
kaiserSumShort = sum(kaiserShort); |
|||
|
|||
kaiserWindowLong(1:1024) = movsum(kaiserLong(1:1024), [1024 0]); |
|||
kaiserWindowLong(1025:2048) = movsum(flipud(kaiserLong(1:1024)), [0 1024]); |
|||
kaiserWindowLong = sqrt(kaiserWindowLong ./ kaiserSumLong); |
|||
|
|||
sinWindowLong = sin(pi * ((0:2047) + 0.5) / 2048); |
|||
|
|||
kaiserWindowShort(1:128) = movsum(kaiserShort(1:128), [128 0]); |
|||
kaiserWindowShort(129:256) = movsum(flipud(kaiserShort(1:128)), [0 128]); |
|||
kaiserWindowShort = sqrt(kaiserWindowShort ./ kaiserSumShort); |
|||
|
|||
sinWindowShort = sin(pi * ((0:255) + 0.5) / 256); |
|||
end |
|||
|
|||
% Initializes output array |
|||
frameF(1024, 2) = 0; |
|||
|
|||
% Applies appropriate window to the frame |
|||
if strcmp(frameType, 'OLS') |
|||
if strcmp(winType, 'KBD') |
|||
frameT = bsxfun(@times, frameT, kaiserWindowLong'); |
|||
elseif strcmp(winType, 'SIN') |
|||
frameT = bsxfun(@times, frameT, sinWindowLong'); |
|||
else |
|||
error('filterbank, l[20]: Unsupported window type input!') |
|||
end |
|||
|
|||
frameF = mdct4(frameT); |
|||
elseif strcmp(frameType, 'LSS') |
|||
if strcmp(winType, 'KBD') |
|||
frameT(1:1024, :) = bsxfun(@times, frameT(1:1024, :), kaiserWindowLong(1:1024)'); |
|||
frameT(1473:1600, :) = bsxfun(@times, frameT(1473:1600, :), kaiserWindowShort(129:end)'); |
|||
frameT(1601:end, :) = 0; |
|||
elseif strcmp(winType, 'SIN') |
|||
frameT(1:1024, :) = bsxfun(@times, frameT(1:1024, :), sinWindowLong(1:1024)'); |
|||
frameT(1473:1600, :) = bsxfun(@times, frameT(1473:1600, :), sinWindowShort(129:end)'); |
|||
frameT(1601:end, :) = 0; |
|||
else |
|||
error('filterbank, l[20]: Unsupported window type input!') |
|||
end |
|||
|
|||
frameF = mdct4(frameT); |
|||
elseif strcmp(frameType, 'LPS') |
|||
if strcmp(winType, 'KBD') |
|||
frameT(1:448, :) = 0; |
|||
frameT(449:576, :) = bsxfun(@times, frameT(449:576, :), kaiserWindowShort(1:128)'); |
|||
frameT(1025:end, :) = bsxfun(@times, frameT(1025:end, :), kaiserWindowLong(1025:end)'); |
|||
elseif strcmp(winType, 'SIN') |
|||
frameT(1:448, :) = 0; |
|||
frameT(449:576, :) = bsxfun(@times, frameT(449:576, :), sinWindowShort(1:128)'); |
|||
frameT(1025:end, :) = bsxfun(@times, frameT(1025:end, :), sinWindowLong(1025:end)'); |
|||
else |
|||
error('filterbank, l[20]: Unsupported window type input!') |
|||
end |
|||
|
|||
frameF = mdct4(frameT); |
|||
elseif strcmp(frameType, 'ESH') |
|||
for channel = 1:2 |
|||
% Splits the frame into sub-frames |
|||
[subFrames, ~] = buffer(frameT(449:end-448, channel), 256, 128, 'nodelay'); |
|||
|
|||
if strcmp(winType, 'KBD') |
|||
subFrames = subFrames .* repmat(kaiserWindowShort', [1 8]); |
|||
elseif strcmp(winType, 'SIN') |
|||
subFrames = subFrames .* repmat(sinWindowShort', [1 8]); |
|||
end |
|||
|
|||
for subFrameIndex = 1:8 |
|||
frameF((subFrameIndex - 1) * 128 + 1:subFrameIndex * 128, channel) = ... |
|||
mdct4(subFrames(:, subFrameIndex)); |
|||
end |
|||
end |
|||
end |
|||
end |
Binary file not shown.
Binary file not shown.
@ -0,0 +1,73 @@ |
|||
function x = iAACoder3(AACSeq3, fNameOut) |
|||
%Implementation of AAC decoder |
|||
% Usage x = iAACoder3(AACSeq3, fNameOut), where: |
|||
% Inputs |
|||
% - fNameOut is the filename and path of the file that will be |
|||
% written after decoding |
|||
% - AACSeq3 is an array of structs containing K structs, where K is |
|||
% the number of computed frames. Every struct of the array consists |
|||
% of: |
|||
% * a frameType, |
|||
% * a winType, |
|||
% * chl.TNScoeffs which are the quantized TNS coefficients of |
|||
% this frame's left channel, |
|||
% * chr.TNScoeffs which are the quantized TNS coefficients of |
|||
% this frame's right channel, |
|||
% * chl.T which are the psychoacoustic thresholds of this frame's |
|||
% left channel, |
|||
% * chr.T which are the psychoacoustic thresholds of this frame's |
|||
% right channel, |
|||
% * chl.G which are the quantized global gains of this frame's |
|||
% left channel, |
|||
% * chr.G which are the quantized global gains of this frame's |
|||
% right channel, |
|||
% * chl.sfc which is the Huffman encoded sfc sequence of this |
|||
% frame's left channel, |
|||
% * chr.sfc which is the Huffman encoded sfc sequence of this |
|||
% frame's right channel, |
|||
% * chl.stream which is the Huffman encoded quantized MDCT |
|||
% sequence of this frame's left channel, |
|||
% * chr.stream which is the Huffman encoded quantized MDCT |
|||
% sequence of this frame's right channel, |
|||
% * chl.codebook which is the Huffman codebook used for this |
|||
% frame's left channel |
|||
% * chr.codebook which is the Huffman codebook used for this |
|||
% frame's right channel |
|||
% |
|||
% Output |
|||
% - x is an array containing the decoded audio samples |
|||
|
|||
% Initializes an array to hold the decoded samples |
|||
decodedAudio(1024 * (length(AACSeq3) + 1), 2) = 0; |
|||
% Initializes an array to hold both audio channels |
|||
frameF(1024, 2) = 0; |
|||
|
|||
% Decodes audio file |
|||
huffLUT = loadLUT(); |
|||
for i = 0:length(AACSeq3) - 1 |
|||
currFrameStart = i * 1024 + 1; |
|||
currFrameStop = currFrameStart + 2047; |
|||
|
|||
SL = decodeHuff(AACSeq3(i + 1).chl.stream, ... |
|||
AACSeq3(i + 1).chl.codebook, huffLUT); |
|||
SR = decodeHuff(AACSeq3(i + 1).chr.stream, ... |
|||
AACSeq3(i + 1).chr.codebook, huffLUT); |
|||
|
|||
sfcL = decodeHuff(AACSeq3(i + 1).chl.sfc, 12, huffLUT); % TODO: maybe LUT(12); |
|||
sfcR = decodeHuff(AACSeq3(i + 1).chr.sfc, 12, huffLUT); |
|||
|
|||
frameF(:, 1) = iAACquantizer(SL, sfcL, AACSeq3(i + 1).chl.G, AACSeq3(i+1).frameType); |
|||
frameF(:, 2) = iAACquantizer(SR, sfcR, AACSeq3(i + 1).chr.G, AACSeq3(i+1).frameType); |
|||
|
|||
TNScoeffsL = AACSeq3(i + 1).chl.TNScoeffs; |
|||
TNScoeffsR = AACSeq3(i + 1).chr.TNScoeffs; |
|||
frameF(:, 1) = iTNS(frameF(:, 1), AACSeq3(i+1).frameType, TNScoeffsL); |
|||
frameF(:, 2) = iTNS(frameF(:, 2), AACSeq3(i+1).frameType, TNScoeffsR); |
|||
frameT = iFilterbank(frameF, AACSeq3(i+1).frameType, AACSeq3(i+1).winType); |
|||
|
|||
decodedAudio(currFrameStart:currFrameStop, :) = decodedAudio(currFrameStart:currFrameStop, :) + frameT; |
|||
end |
|||
|
|||
audiowrite(fNameOut, decodedAudio, 48000); |
|||
x = decodedAudio; |
|||
end |
@ -0,0 +1,24 @@ |
|||
function frameF = iAACquantizer(S, sfc, G, frameType) |
|||
%Implementation of the inverse Quantizer |
|||
% Usage frameF = iAACquantizer(S, sfc, G, frameType), where: |
|||
% Inputs |
|||
% - S are the MDCT quantization symbols of one audio channel stored |
|||
% in a vector of length 1024 |
|||
% - sfc are the scalefactors per band stored in an array of |
|||
% dimensions NBX8 for EIGHT_SHORT_SEQUENCE frames and NBX1 |
|||
% otherwise, where NB is the number of bands |
|||
% - G is the global gain stored in an array of dimensions 1X8 for |
|||
% EIGHT_SHORT_SEQUENCE frames and a single value otherwise |
|||
% - frameType is the type of the current frame in string |
|||
% representation, can be one of "OLS" (ONLY_LONG_SEQUENCE), "LSS" |
|||
% (LONG_START_SEQUENCE), "ESH" (EIGHT_SHORT_SEQUENCE), "LPS" |
|||
% (LONG_STOP_SEQUENCE) |
|||
% |
|||
% Output |
|||
% - frameF is the frame in the frequency domain, in MDCT coefficients |
|||
% representation containing only one of the audio channels stored in |
|||
% a vector of length 1024 |
|||
|
|||
|
|||
end |
|||
|
@ -0,0 +1,99 @@ |
|||
function frameT = iFilterbank(frameF, frameType, winType) |
|||
%Implementation of the Inverse Filter Bank step |
|||
% Usage frameT = iFilterbank(frameF, frameType, winType), where: |
|||
% Inputs |
|||
% - frameF is the frame in the frequency domain, in MDCT coefficients |
|||
% representation containing both channels of the audio stored in an |
|||
% array of dimensions 1024X2 |
|||
% - frameType is the type of the current frame in string |
|||
% representation, can be one of "OLS" (ONLY_LONG_SEQUENCE), "LSS" |
|||
% (LONG_START_SEQUENCE), "ESH" (EIGHT_SHORT_SEQUENCE), "LPS" |
|||
% (LONG_STOP_SEQUENCE) |
|||
% - winType is the type of the window selected, can be one of "KBD", |
|||
% "SIN" |
|||
% |
|||
% Output |
|||
% - frameT is a frame in the time domain, containing both channels of |
|||
% the audio stored in an array of dimensions 2048X2 |
|||
|
|||
% Declares persistent windows variables and initializes if empty |
|||
persistent kaiserWindowLong kaiserWindowShort sinWindowLong sinWindowShort; |
|||
if isempty(kaiserWindowLong) || isempty(kaiserWindowShort) || ... |
|||
isempty(sinWindowLong) || isempty(sinWindowShort) |
|||
kaiserLong = kaiser(1025, 6*pi); |
|||
kaiserSumLong = sum(kaiserLong); |
|||
kaiserShort = kaiser(129, 4*pi); |
|||
kaiserSumShort = sum(kaiserShort); |
|||
|
|||
kaiserWindowLong(1:1024) = movsum(kaiserLong(1:1024), [1024 0]); |
|||
kaiserWindowLong(1025:2048) = movsum(flipud(kaiserLong(1:1024)), [0 1024]); |
|||
kaiserWindowLong = sqrt(kaiserWindowLong ./ kaiserSumLong); |
|||
|
|||
sinWindowLong = sin(pi * ((0:2047) + 0.5) / 2048); |
|||
|
|||
kaiserWindowShort(1:128) = movsum(kaiserShort(1:128), [128 0]); |
|||
kaiserWindowShort(129:256) = movsum(flipud(kaiserShort(1:128)), [0 128]); |
|||
kaiserWindowShort = sqrt(kaiserWindowShort ./ kaiserSumShort); |
|||
|
|||
sinWindowShort = sin(pi * ((0:255) + 0.5) / 256); |
|||
end |
|||
|
|||
% Initializes output array |
|||
frameT(2048, 2) = 0; |
|||
|
|||
% Applies appropriate window to the frame |
|||
if strcmp(frameType, 'OLS') |
|||
frameT = imdct4(frameF); |
|||
|
|||
if strcmp(winType, 'KBD') |
|||
frameT = bsxfun(@times, frameT, kaiserWindowLong'); |
|||
elseif strcmp(winType, 'SIN') |
|||
frameT = bsxfun(@times, frameT, sinWindowLong'); |
|||
else |
|||
error('filterbank, l[20]: Unsupported window type input!') |
|||
end |
|||
elseif strcmp(frameType, 'LSS') |
|||
frameT = imdct4(frameF); |
|||
|
|||
if strcmp(winType, 'KBD') |
|||
frameT(1:1024, :) = bsxfun(@times, frameT(1:1024, :), kaiserWindowLong(1:1024)'); |
|||
frameT(1473:1600, :) = bsxfun(@times, frameT(1473:1600, :), kaiserWindowShort(129:end)'); |
|||
frameT(1601:end, :) = 0; |
|||
elseif strcmp(winType, 'SIN') |
|||
frameT(1:1024, :) = bsxfun(@times, frameT(1:1024, :), sinWindowLong(1:1024)'); |
|||
frameT(1473:1600, :) = bsxfun(@times, frameT(1473:1600, :), sinWindowShort(129:end)'); |
|||
frameT(1601:end, :) = 0; |
|||
else |
|||
error('filterbank, l[20]: Unsupported window type input!') |
|||
end |
|||
elseif strcmp(frameType, 'LPS') |
|||
frameT = imdct4(frameF); |
|||
|
|||
if strcmp(winType, 'KBD') |
|||
frameT(1:448, :) = 0; |
|||
frameT(449:576, :) = bsxfun(@times, frameT(449:576, :), kaiserWindowShort(1:128)'); |
|||
frameT(1025:end, :) = bsxfun(@times, frameT(1025:end, :), kaiserWindowLong(1025:end)'); |
|||
elseif strcmp(winType, 'SIN') |
|||
frameT(1:448, :) = 0; |
|||
frameT(449:576, :) = bsxfun(@times, frameT(449:576, :), sinWindowShort(1:128)'); |
|||
frameT(1025:end, :) = bsxfun(@times, frameT(1025:end, :), sinWindowLong(1025:end)'); |
|||
else |
|||
error('filterbank, l[20]: Unsupported window type input!') |
|||
end |
|||
elseif strcmp(frameType, 'ESH') |
|||
for channel=1:2 |
|||
for subFrameIndex = 1:8 |
|||
subFrame = imdct4(frameF((subFrameIndex - 1) * 128 + 1:subFrameIndex * 128, channel)); |
|||
|
|||
if strcmp(winType, 'KBD') |
|||
subFrame = subFrame .* kaiserWindowShort'; |
|||
elseif strcmp(winType, 'SIN') |
|||
subFrame = subFrame .* sinWindowShort'; |
|||
end |
|||
|
|||
frameT(448 + (subFrameIndex - 1) * 128 + 1:448 + (subFrameIndex + 1) * 128, channel) = ... |
|||
frameT(448 + (subFrameIndex - 1) * 128 + 1:448 + (subFrameIndex + 1) * 128, channel) + subFrame; |
|||
end |
|||
end |
|||
end |
|||
end |
@ -0,0 +1,38 @@ |
|||
function frameFout = iTNS(frameFin, frameType, TNScoeffs) |
|||
%Implementation of the reverse TNS step |
|||
% Usage frameFout = iTNS(frameFin, frameType, TNScoeffs), where: |
|||
% Inputs |
|||
% - frameFin is the frame in the frequency domain, in MDCT coefficients |
|||
% representation containing both channels of the audio stored in an |
|||
% array of dimensions 1024X2 |
|||
% - frameType is the type of the current frame in string |
|||
% representation, can be one of "OLS" (ONLY_LONG_SEQUENCE), "LSS" |
|||
% (LONG_START_SEQUENCE), "ESH" (EIGHT_SHORT_SEQUENCE), "LPS" |
|||
% (LONG_STOP_SEQUENCE) |
|||
% - TNScoeffs is the quantized TNS coefficients array of dimensions |
|||
% 4X8 for EIGHT_SHORT_SEQUENCE frames and 4X1 otherwise |
|||
% |
|||
% Output |
|||
% - frameFout is the frame in the frequency domain after Temporal Noise |
|||
% Shaping, in MDCT coefficients representation containing both channels |
|||
% of the audio stored in an array of dimensions 1024X2 |
|||
|
|||
% Declares persistent variable holding the TNS tables and initializes if empty |
|||
persistent TNSTables; |
|||
if isempty(TNSTables) |
|||
TNSTables = load('TableB219.mat'); |
|||
end |
|||
|
|||
if ~strcmp(frameType, 'ESH') |
|||
% Inverses MDCT coefficients filtering |
|||
frameFout = filter(1, [1 TNScoeffs], frameFin); |
|||
else |
|||
for subFrameIndex = 1:8 |
|||
subFrame = frameFin((subFrameIndex - 1) * 128 + 1:subFrameIndex * 128); |
|||
|
|||
% Inverses MDCT coefficients filtering |
|||
frameFout((subFrameIndex - 1) * 128 + 1:subFrameIndex * 128) = ... |
|||
filter(1, [1 TNScoeffs((subFrameIndex - 1) * 4 + 1:subFrameIndex * 4)], subFrame); |
|||
end |
|||
end |
|||
end |
@ -0,0 +1,61 @@ |
|||
|
|||
function y = imdct4(x) |
|||
% IMDCT4 Calculates the Modified Discrete Cosine Transform |
|||
% y = imdct4(x) |
|||
% |
|||
% x: input signal (can be either a column or frame per column) |
|||
% y: IMDCT of x |
|||
% |
|||
% Vectorize ! ! ! |
|||
|
|||
% ------- imdct4.m ----------------------------------------- |
|||
% Marios Athineos, marios@ee.columbia.edu |
|||
% http://www.ee.columbia.edu/~marios/ |
|||
% Copyright (c) 2002 by Columbia University. |
|||
% All rights reserved. |
|||
% ---------------------------------------------------------- |
|||
|
|||
[flen,fnum] = size(x); |
|||
% Make column if it's a single row |
|||
if (flen==1) |
|||
x = x(:); |
|||
flen = fnum; |
|||
fnum = 1; |
|||
end |
|||
|
|||
% We need these for furmulas below |
|||
N = flen; |
|||
M = N/2; |
|||
twoN = 2*N; |
|||
sqrtN = sqrt(twoN); |
|||
|
|||
% We need this twice so keep it around |
|||
t = (0:(M-1)).'; |
|||
w = diag(sparse(exp(-j*2*pi*(t+1/8)/twoN))); |
|||
|
|||
% Pre-twiddle |
|||
t = (0:(M-1)).'; |
|||
c = x(2*t+1,:) + j*x(N-1-2*t+1,:); |
|||
c = (0.5*w)*c; |
|||
|
|||
% FFT for N/2 points only !!! |
|||
c = fft(c,M); |
|||
|
|||
% Post-twiddle |
|||
c = ((8/sqrtN)*w)*c; |
|||
|
|||
% Preallocate rotation matrix |
|||
rot = zeros(twoN,fnum); |
|||
|
|||
% Sort |
|||
t = (0:(M-1)).'; |
|||
rot(2*t+1,:) = real(c(t+1,:)); |
|||
rot(N+2*t+1,:) = imag(c(t+1,:)); |
|||
t = (1:2:(twoN-1)).'; |
|||
rot(t+1,:) = -rot(twoN-1-t+1,:); |
|||
|
|||
% Shift |
|||
t = (0:(3*M-1)).'; |
|||
y(t+1,:) = rot(t+M+1,:); |
|||
t = (3*M:(twoN-1)).'; |
|||
y(t+1,:) = -rot(t-3*M+1,:); |
@ -0,0 +1,50 @@ |
|||
function huffLUT = loadLUT() |
|||
load huffCodebooks |
|||
load huffCodebookSF |
|||
huffCodebooks{end + 1} = huffCodebookSF; |
|||
for i=1:12 |
|||
h=huffCodebooks{i}(:,3); |
|||
hlength=huffCodebooks{i}(:,2); |
|||
for j=1:length(h) |
|||
hbin{j}=dec2bin(h(j),hlength(j)); |
|||
end |
|||
invTable{i}=vlcTable(hbin); |
|||
clear hbin; |
|||
end |
|||
huffLUT{1}=struct('LUT',huffCodebooks{1},'invTable',invTable{1},'codebook',1,'nTupleSize',4,'maxAbsCodeVal',1,'signedValues',1); |
|||
huffLUT{2}=struct('LUT',huffCodebooks{2},'invTable',invTable{2},'codebook',2,'nTupleSize',4,'maxAbsCodeVal',1,'signedValues',1); |
|||
huffLUT{3}=struct('LUT',huffCodebooks{3},'invTable',invTable{3},'codebook',3,'nTupleSize',4,'maxAbsCodeVal',2,'signedValues',0); |
|||
huffLUT{4}=struct('LUT',huffCodebooks{4},'invTable',invTable{4},'codebook',4,'nTupleSize',4,'maxAbsCodeVal',2,'signedValues',0); |
|||
huffLUT{5}=struct('LUT',huffCodebooks{5},'invTable',invTable{5},'codebook',5,'nTupleSize',2,'maxAbsCodeVal',4,'signedValues',1); |
|||
huffLUT{6}=struct('LUT',huffCodebooks{6},'invTable',invTable{6},'codebook',6,'nTupleSize',2,'maxAbsCodeVal',4,'signedValues',1); |
|||
huffLUT{7}=struct('LUT',huffCodebooks{7},'invTable',invTable{7},'codebook',7,'nTupleSize',2,'maxAbsCodeVal',7,'signedValues',0); |
|||
huffLUT{8}=struct('LUT',huffCodebooks{8},'invTable',invTable{8},'codebook',8,'nTupleSize',2,'maxAbsCodeVal',7,'signedValues',0); |
|||
huffLUT{9}=struct('LUT',huffCodebooks{9},'invTable',invTable{9},'codebook',9,'nTupleSize',2,'maxAbsCodeVal',12,'signedValues',0); |
|||
huffLUT{10}=struct('LUT',huffCodebooks{10},'invTable',invTable{10},'codebook',10,'nTupleSize',2,'maxAbsCodeVal',12,'signedValues',0); |
|||
huffLUT{11}=struct('LUT',huffCodebooks{11},'invTable',invTable{11},'codebook',11,'nTupleSize',2,'maxAbsCodeVal',16,'signedValues',0); |
|||
huffLUT{12}=struct('LUT',huffCodebooks{12},'invTable',invTable{12},'codebook',12,'nTupleSize',1,'maxAbsCodeVal',60,'signedValues',1); |
|||
|
|||
function [h]=vlcTable(codeArray) |
|||
% Generates the inverse variable length coding tree, stored in a table. It |
|||
% is used for decoding. |
|||
% |
|||
% codeArray: the input matrix, in the form of a cell array, with each cell |
|||
% containing codewords represented in string format (array of chars) |
|||
h=zeros(1,3); |
|||
for codeIndex=1:length(codeArray) |
|||
word=str2num([codeArray{codeIndex}]')'; |
|||
hIndex=1; |
|||
hLength=size(h,1); |
|||
for i=1:length(word) |
|||
k=word(i)+1; |
|||
if(~h(hIndex,k)) |
|||
h(hIndex,k)=hLength+1; |
|||
hIndex=hLength+1; |
|||
h(hIndex,1:2)=zeros(1,2); |
|||
hLength=hLength+1; |
|||
else |
|||
hIndex=h(hIndex,k); |
|||
end |
|||
end |
|||
h(hIndex,3)=codeIndex; |
|||
end |
@ -0,0 +1,75 @@ |
|||
|
|||
function y = mdct4(x) |
|||
% MDCT4 Calculates the Modified Discrete Cosine Transform |
|||
% y = mdct4(x) |
|||
% |
|||
% Use either a Sine or a Kaiser-Bessel Derived window (KBDWin)with |
|||
% 50% overlap for perfect TDAC reconstruction. |
|||
% Remember that MDCT coefs are symmetric: y(k)=-y(N-k-1) so the full |
|||
% matrix (N) of coefs is: yf = [y;-flipud(y)]; |
|||
% |
|||
% x: input signal (can be either a column or frame per column) |
|||
% length of x must be a integer multiple of 4 (each frame) |
|||
% y: MDCT of x (coefs are divided by sqrt(N)) |
|||
% |
|||
% Vectorize ! ! ! |
|||
|
|||
% ------- mdct4.m ------------------------------------------ |
|||
% Marios Athineos, marios@ee.columbia.edu |
|||
% http://www.ee.columbia.edu/~marios/ |
|||
% Copyright (c) 2002 by Columbia University. |
|||
% All rights reserved. |
|||
% ---------------------------------------------------------- |
|||
|
|||
[flen,fnum] = size(x); |
|||
% Make column if it's a single row |
|||
if (flen==1) |
|||
x = x(:); |
|||
flen = fnum; |
|||
fnum = 1; |
|||
end |
|||
% Make sure length is multiple of 4 |
|||
if (rem(flen,4)~=0) |
|||
error('MDCT4 defined for lengths multiple of four.'); |
|||
end |
|||
|
|||
% We need these for furmulas below |
|||
N = flen; % Length of window |
|||
M = N/2; % Number of coefficients |
|||
N4 = N/4; % Simplify the way eqs look |
|||
sqrtN = sqrt(N); |
|||
|
|||
% Preallocate rotation matrix |
|||
% It would be nice to be able to do it in-place but we cannot |
|||
% cause of the prerotation. |
|||
rot = zeros(flen,fnum); |
|||
|
|||
% Shift |
|||
t = (0:(N4-1)).'; |
|||
rot(t+1,:) = -x(t+3*N4+1,:); |
|||
t = (N4:(N-1)).'; |
|||
rot(t+1,:) = x(t-N4+1,:); |
|||
clear x; |
|||
|
|||
% We need this twice so keep it around |
|||
t = (0:(N4-1)).'; |
|||
w = diag(sparse(exp(-j*2*pi*(t+1/8)/N))); |
|||
|
|||
% Pre-twiddle |
|||
t = (0:(N4-1)).'; |
|||
c = (rot(2*t+1,:)-rot(N-1-2*t+1,:))... |
|||
-j*(rot(M+2*t+1,:)-rot(M-1-2*t+1,:)); |
|||
% This is a really cool Matlab trick ;) |
|||
c = 0.5*w*c; |
|||
clear rot; |
|||
|
|||
% FFT for N/4 points only !!! |
|||
c = fft(c,N4); |
|||
|
|||
% Post-twiddle |
|||
c = (2/sqrtN)*w*c; |
|||
|
|||
% Sort |
|||
t = (0:(N4-1)).'; |
|||
y(2*t+1,:) = real(c(t+1,:)); |
|||
y(M-1-2*t+1,:) = -imag(c(t+1,:)); |
@ -0,0 +1,23 @@ |
|||
function SMR = psycho(frameT, frameType, frameTprev1, frameTprev2) |
|||
%Implementation of Psychoacoustic Model |
|||
% Usage SMR = psycho(frameT, frameType, frameTprev1, frameTprev2), where: |
|||
% Inputs |
|||
% - frameT is a frame in the time domain, containing only one of the |
|||
% audio channels stored in a vector of length 2048 |
|||
% - frameType is the type of the current frame in string |
|||
% representation, can be one of "OLS" (ONLY_LONG_SEQUENCE), "LSS" |
|||
% (LONG_START_SEQUENCE), "ESH" (EIGHT_SHORT_SEQUENCE), "LPS" |
|||
% (LONG_STOP_SEQUENCE) |
|||
% - frameTprev1 is the previous frame in the time domain, containing |
|||
% only one of the audio channels stored in a vector of length 2048 |
|||
% - frameTprev2 is the frame before frameTprev1 in the time domain, |
|||
% containing only one of the audio channels stored in a vector of |
|||
% length 2048 |
|||
% |
|||
% Output |
|||
% - SMR is the signal to mask ratio array of dimensions 42X8 for |
|||
% EIGHT_SHORT_SEQUENCE frames and 69X1 otherwise |
|||
|
|||
|
|||
end |
|||
|
Loading…
Reference in new issue