Browse Source

Complete psycho for non ESH frames, Various fixes

master
Apostolos Fanakis 6 years ago
parent
commit
e520bcf0ac
  1. 25
      Level_3/AACoder3.m
  2. 41
      Level_3/psycho.m

25
Level_3/AACoder3.m

@ -40,6 +40,16 @@ function AACSeq3 = AACoder3(fNameIn, fnameAACoded)
% Declares constant window type % Declares constant window type
WINDOW_TYPE = 'SIN'; WINDOW_TYPE = 'SIN';
% Declares constant numbers of bands for long and short windows
LONG_WINDOW_NUMBER_OF_BANDS = 69;
SHORT_WINDOW_NUMBER_OF_BANDS = 42;
% Declares persistent variable holding the TNS tables and initializes if empty
persistent TNSTables;
if isempty(TNSTables)
TNSTables = load('TableB219.mat');
end
% Reads the audio file % Reads the audio file
[originalAudioData, ~] = audioread(fNameIn); [originalAudioData, ~] = audioread(fNameIn);
@ -64,6 +74,8 @@ function AACSeq3 = AACoder3(fNameIn, fnameAACoded)
% Encodes audio file % Encodes audio file
huffLUT = loadLUT(); huffLUT = loadLUT();
AACSeq3(length(frameTypes)) = struct; AACSeq3(length(frameTypes)) = struct;
TL(LONG_WINDOW_NUMBER_OF_BANDS) = 0;
TR(LONG_WINDOW_NUMBER_OF_BANDS) = 0;
for i = 0:length(frameTypes) - 1 for i = 0:length(frameTypes) - 1
currFrameStart = i * 1024 + 1; currFrameStart = i * 1024 + 1;
currFrameStop = currFrameStart + 2047; currFrameStop = currFrameStart + 2047;
@ -95,6 +107,15 @@ function AACSeq3 = AACoder3(fNameIn, fnameAACoded)
[SR, sfcR, GR] = AACquantizer(frameF, frameTypes{i+1}, SMRR); [SR, sfcR, GR] = AACquantizer(frameF, frameTypes{i+1}, SMRR);
end end
for band = 1:LONG_WINDOW_NUMBER_OF_BANDS
TL(band) = sumsqr(frameF((TNSTables.B219a(band, 2) + 1: ...
TNSTables.B219a(band, 3) + 1), 1));
TR(band) = sumsqr(frameF((TNSTables.B219a(band, 2) + 1: ...
TNSTables.B219a(band, 3) + 1), 2));
end
TL = TL ./ SMRL;
TR = TR ./ SMRR;
[streamL, huffcodebookL] = encodeHuff(SL, huffLUT); [streamL, huffcodebookL] = encodeHuff(SL, huffLUT);
[streamR, huffcodebookR] = encodeHuff(SR, huffLUT); [streamR, huffcodebookR] = encodeHuff(SR, huffLUT);
[sfcL, ~] = encodeHuff(sfcL, huffLUT, 12); [sfcL, ~] = encodeHuff(sfcL, huffLUT, 12);
@ -104,8 +125,8 @@ function AACSeq3 = AACoder3(fNameIn, fnameAACoded)
AACSeq3(i + 1).winType = WINDOW_TYPE; AACSeq3(i + 1).winType = WINDOW_TYPE;
AACSeq3(i + 1).chl.TNScoeffs = TNScoeffsL; AACSeq3(i + 1).chl.TNScoeffs = TNScoeffsL;
AACSeq3(i + 1).chr.TNScoeffs = TNScoeffsR; AACSeq3(i + 1).chr.TNScoeffs = TNScoeffsR;
AACSeq3(i + 1).chl.T = what; % TODO: find dis shit AACSeq3(i + 1).chl.T = TL;
AACSeq3(i + 1).chr.T = what; AACSeq3(i + 1).chr.T = TR;
AACSeq3(i + 1).chl.G = GL; AACSeq3(i + 1).chl.G = GL;
AACSeq3(i + 1).chr.G = GR; AACSeq3(i + 1).chr.G = GR;
AACSeq3(i + 1).chl.sfc = sfcL; AACSeq3(i + 1).chl.sfc = sfcL;

41
Level_3/psycho.m

@ -21,6 +21,9 @@ function SMR = psycho(frameT, frameType, frameTprev1, frameTprev2)
% Declares constant numbers of bands for long and short windows % Declares constant numbers of bands for long and short windows
LONG_WINDOW_NUMBER_OF_BANDS = 69; LONG_WINDOW_NUMBER_OF_BANDS = 69;
SHORT_WINDOW_NUMBER_OF_BANDS = 42; SHORT_WINDOW_NUMBER_OF_BANDS = 42;
% Declares constant noise masking tone and tone masking noise decibels
NOISE_MASKING_TONE = 6;
TONE_MASKING_NOISE = 18;
% Declares persistent variable holding the TNS tables and initializes if empty % Declares persistent variable holding the TNS tables and initializes if empty
persistent TNSTables spreadingLong spreadingShort hannLong hannShort; persistent TNSTables spreadingLong spreadingShort hannLong hannShort;
@ -63,6 +66,8 @@ function SMR = psycho(frameT, frameType, frameTprev1, frameTprev2)
hannLong = 0.5 - 0.5 * cos((pi * (0:2047 + 0.5)) / 1024); hannLong = 0.5 - 0.5 * cos((pi * (0:2047 + 0.5)) / 1024);
hannShort = 0.5 - 0.5 * cos((pi * (0:255 + 0.5)) / 128); hannShort = 0.5 - 0.5 * cos((pi * (0:255 + 0.5)) / 128);
clearvars tmpx tmpz tmpy
end end
if ~strcmp(frameType, 'ESH') if ~strcmp(frameType, 'ESH')
@ -73,15 +78,15 @@ function SMR = psycho(frameT, frameType, frameTprev1, frameTprev2)
% Calculates the FFT of each frame % Calculates the FFT of each frame
frameF = fft(windowedFrameT); frameF = fft(windowedFrameT);
frameFMag = 2 * abs(frameF(1:1024)); frameFMag = abs(frameF(1:1024));
frameFPhase = angle(frameF(1:1024)); frameFPhase = angle(frameF(1:1024));
frameFrameFprev1 = fft(windowedFrameTprev1); frameFrameFprev1 = fft(windowedFrameTprev1);
frameFrameFprev1Mag = 2 * abs(frameFrameFprev1(1:1024)); frameFrameFprev1Mag = abs(frameFrameFprev1(1:1024));
frameFrameFprev1Phase = angle(frameFrameFprev1(1:1024)); frameFrameFprev1Phase = angle(frameFrameFprev1(1:1024));
frameFrameFprev2 = fft(windowedFrameTprev2); frameFrameFprev2 = fft(windowedFrameTprev2);
frameFrameFprev2Mag = 2 * abs(frameFrameFprev2(1:1024)); frameFrameFprev2Mag = abs(frameFrameFprev2(1:1024));
frameFrameFprev2Phase = angle(frameFrameFprev2(1:1024)); frameFrameFprev2Phase = angle(frameFrameFprev2(1:1024));
% Calculates the predicted magnitude and phase compontents of each % Calculates the predicted magnitude and phase compontents of each
@ -103,22 +108,38 @@ function SMR = psycho(frameT, frameType, frameTprev1, frameTprev2)
for band = 1:LONG_WINDOW_NUMBER_OF_BANDS for band = 1:LONG_WINDOW_NUMBER_OF_BANDS
bandEnergy(band) = sumsqr(frameFMag(TNSTables.B219a(band, 2) + 1: ... bandEnergy(band) = sumsqr(frameFMag(TNSTables.B219a(band, 2) + 1: ...
TNSTables.B219a(band, 3) + 1)); TNSTables.B219a(band, 3) + 1));
bandPredictability(band) = sumsqr(frameFMag( ... bandPredictability(band) = sum(frameFMag( ...
TNSTables.B219a(band, 2) + 1:TNSTables.B219a(band, 3) + 1) .* ... TNSTables.B219a(band, 2) + 1:TNSTables.B219a(band, 3) + 1) .^ 2 .* ...
framePredictability(TNSTables.B219a(band, 2) + 1: ... framePredictability(TNSTables.B219a(band, 2) + 1: ...
TNSTables.B219a(band, 3) + 1)); TNSTables.B219a(band, 3) + 1));
end end
% Convolves the partitioned energy and predictability with the % Convolves the partitioned energy and predictability with the
% spreading function % spreading function
ecb = sum(bandEnergy .* spreadingLong', 2); bandEnergyConv = sum(bandEnergy .* spreadingLong', 2);
ct = sum(bandPredictability .* spreadingLong', 2); bandPredictabilityConv = sum(bandPredictability .* spreadingLong', 2);
% Renormalizes values % Renormalizes values
cb = ct ./ ecb; bandPredictabilityConv = bandPredictabilityConv ./ bandEnergyConv;
en = ecb ./ sum(spreadingLong, 1); bandEnergyConv = bandEnergyConv ./ sum(spreadingLong, 1)';
% Calculates the tonality index % Calculates the tonality index
tb = -0.299 - 0.43 .* log(cb); tonalIndex = -0.299 - 0.43 .* log(bandPredictabilityConv);
tonalIndex(tonalIndex < 0) = 0;
tonalIndex(tonalIndex > 1) = 1;
% Calculates SNR and converts from dB to power ratio
signalToNoiseRatio = tonalIndex .* TONE_MASKING_NOISE + ...
(1 - tonalIndex) .* NOISE_MASKING_TONE;
powerRatio = 10 .^ (-signalToNoiseRatio ./ 10);
% Calculates the energy threshold
energyThreshold = bandEnergyConv .* powerRatio;
% Calculates the noise level
qThrN = eps() * 1024 .* 10 .^ (TNSTables.B219a(:, 6) ./ 10);
noiseLevel = max(energyThreshold, qThrN);
SMR = bandEnergy ./ noiseLevel';
end end
end end

Loading…
Cancel
Save