From e520bcf0ac787174680ad4e10b6ae6c6c2e0d864 Mon Sep 17 00:00:00 2001 From: Apostolof Date: Mon, 4 Feb 2019 12:35:57 +0200 Subject: [PATCH] Complete psycho for non ESH frames, Various fixes --- Level_3/AACoder3.m | 25 +++++++++++++++++++++++-- Level_3/psycho.m | 41 +++++++++++++++++++++++++++++++---------- 2 files changed, 54 insertions(+), 12 deletions(-) diff --git a/Level_3/AACoder3.m b/Level_3/AACoder3.m index 547d09a..8a7945f 100644 --- a/Level_3/AACoder3.m +++ b/Level_3/AACoder3.m @@ -39,6 +39,16 @@ function AACSeq3 = AACoder3(fNameIn, fnameAACoded) % Declares constant window type WINDOW_TYPE = 'SIN'; + + % Declares constant numbers of bands for long and short windows + LONG_WINDOW_NUMBER_OF_BANDS = 69; + SHORT_WINDOW_NUMBER_OF_BANDS = 42; + + % Declares persistent variable holding the TNS tables and initializes if empty + persistent TNSTables; + if isempty(TNSTables) + TNSTables = load('TableB219.mat'); + end % Reads the audio file [originalAudioData, ~] = audioread(fNameIn); @@ -64,6 +74,8 @@ function AACSeq3 = AACoder3(fNameIn, fnameAACoded) % Encodes audio file huffLUT = loadLUT(); AACSeq3(length(frameTypes)) = struct; + TL(LONG_WINDOW_NUMBER_OF_BANDS) = 0; + TR(LONG_WINDOW_NUMBER_OF_BANDS) = 0; for i = 0:length(frameTypes) - 1 currFrameStart = i * 1024 + 1; currFrameStop = currFrameStart + 2047; @@ -95,6 +107,15 @@ function AACSeq3 = AACoder3(fNameIn, fnameAACoded) [SR, sfcR, GR] = AACquantizer(frameF, frameTypes{i+1}, SMRR); end + for band = 1:LONG_WINDOW_NUMBER_OF_BANDS + TL(band) = sumsqr(frameF((TNSTables.B219a(band, 2) + 1: ... + TNSTables.B219a(band, 3) + 1), 1)); + TR(band) = sumsqr(frameF((TNSTables.B219a(band, 2) + 1: ... + TNSTables.B219a(band, 3) + 1), 2)); + end + TL = TL ./ SMRL; + TR = TR ./ SMRR; + [streamL, huffcodebookL] = encodeHuff(SL, huffLUT); [streamR, huffcodebookR] = encodeHuff(SR, huffLUT); [sfcL, ~] = encodeHuff(sfcL, huffLUT, 12); @@ -104,8 +125,8 @@ function AACSeq3 = AACoder3(fNameIn, fnameAACoded) AACSeq3(i + 1).winType = WINDOW_TYPE; AACSeq3(i + 1).chl.TNScoeffs = TNScoeffsL; AACSeq3(i + 1).chr.TNScoeffs = TNScoeffsR; - AACSeq3(i + 1).chl.T = what; % TODO: find dis shit - AACSeq3(i + 1).chr.T = what; + AACSeq3(i + 1).chl.T = TL; + AACSeq3(i + 1).chr.T = TR; AACSeq3(i + 1).chl.G = GL; AACSeq3(i + 1).chr.G = GR; AACSeq3(i + 1).chl.sfc = sfcL; diff --git a/Level_3/psycho.m b/Level_3/psycho.m index 6b9c0b0..e1d279f 100644 --- a/Level_3/psycho.m +++ b/Level_3/psycho.m @@ -21,6 +21,9 @@ function SMR = psycho(frameT, frameType, frameTprev1, frameTprev2) % Declares constant numbers of bands for long and short windows LONG_WINDOW_NUMBER_OF_BANDS = 69; SHORT_WINDOW_NUMBER_OF_BANDS = 42; + % Declares constant noise masking tone and tone masking noise decibels + NOISE_MASKING_TONE = 6; + TONE_MASKING_NOISE = 18; % Declares persistent variable holding the TNS tables and initializes if empty persistent TNSTables spreadingLong spreadingShort hannLong hannShort; @@ -63,6 +66,8 @@ function SMR = psycho(frameT, frameType, frameTprev1, frameTprev2) hannLong = 0.5 - 0.5 * cos((pi * (0:2047 + 0.5)) / 1024); hannShort = 0.5 - 0.5 * cos((pi * (0:255 + 0.5)) / 128); + + clearvars tmpx tmpz tmpy end if ~strcmp(frameType, 'ESH') @@ -73,15 +78,15 @@ function SMR = psycho(frameT, frameType, frameTprev1, frameTprev2) % Calculates the FFT of each frame frameF = fft(windowedFrameT); - frameFMag = 2 * abs(frameF(1:1024)); + frameFMag = abs(frameF(1:1024)); frameFPhase = angle(frameF(1:1024)); frameFrameFprev1 = fft(windowedFrameTprev1); - frameFrameFprev1Mag = 2 * abs(frameFrameFprev1(1:1024)); + frameFrameFprev1Mag = abs(frameFrameFprev1(1:1024)); frameFrameFprev1Phase = angle(frameFrameFprev1(1:1024)); frameFrameFprev2 = fft(windowedFrameTprev2); - frameFrameFprev2Mag = 2 * abs(frameFrameFprev2(1:1024)); + frameFrameFprev2Mag = abs(frameFrameFprev2(1:1024)); frameFrameFprev2Phase = angle(frameFrameFprev2(1:1024)); % Calculates the predicted magnitude and phase compontents of each @@ -103,22 +108,38 @@ function SMR = psycho(frameT, frameType, frameTprev1, frameTprev2) for band = 1:LONG_WINDOW_NUMBER_OF_BANDS bandEnergy(band) = sumsqr(frameFMag(TNSTables.B219a(band, 2) + 1: ... TNSTables.B219a(band, 3) + 1)); - bandPredictability(band) = sumsqr(frameFMag( ... - TNSTables.B219a(band, 2) + 1:TNSTables.B219a(band, 3) + 1) .* ... + bandPredictability(band) = sum(frameFMag( ... + TNSTables.B219a(band, 2) + 1:TNSTables.B219a(band, 3) + 1) .^ 2 .* ... framePredictability(TNSTables.B219a(band, 2) + 1: ... TNSTables.B219a(band, 3) + 1)); end % Convolves the partitioned energy and predictability with the % spreading function - ecb = sum(bandEnergy .* spreadingLong', 2); - ct = sum(bandPredictability .* spreadingLong', 2); + bandEnergyConv = sum(bandEnergy .* spreadingLong', 2); + bandPredictabilityConv = sum(bandPredictability .* spreadingLong', 2); % Renormalizes values - cb = ct ./ ecb; - en = ecb ./ sum(spreadingLong, 1); + bandPredictabilityConv = bandPredictabilityConv ./ bandEnergyConv; + bandEnergyConv = bandEnergyConv ./ sum(spreadingLong, 1)'; % Calculates the tonality index - tb = -0.299 - 0.43 .* log(cb); + tonalIndex = -0.299 - 0.43 .* log(bandPredictabilityConv); + tonalIndex(tonalIndex < 0) = 0; + tonalIndex(tonalIndex > 1) = 1; + + % Calculates SNR and converts from dB to power ratio + signalToNoiseRatio = tonalIndex .* TONE_MASKING_NOISE + ... + (1 - tonalIndex) .* NOISE_MASKING_TONE; + powerRatio = 10 .^ (-signalToNoiseRatio ./ 10); + + % Calculates the energy threshold + energyThreshold = bandEnergyConv .* powerRatio; + + % Calculates the noise level + qThrN = eps() * 1024 .* 10 .^ (TNSTables.B219a(:, 6) ./ 10); + noiseLevel = max(energyThreshold, qThrN); + + SMR = bandEnergy ./ noiseLevel'; end end