From 9a44f99a4d709710e42262608a506a40313accd1 Mon Sep 17 00:00:00 2001 From: Apostolof Date: Sun, 3 Feb 2019 20:28:20 +0200 Subject: [PATCH] Init psycho --- Level_3/AACoder3.m | 22 +++++++--- Level_3/psycho.m | 105 ++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 118 insertions(+), 9 deletions(-) diff --git a/Level_3/AACoder3.m b/Level_3/AACoder3.m index bfc0156..547d09a 100644 --- a/Level_3/AACoder3.m +++ b/Level_3/AACoder3.m @@ -73,18 +73,26 @@ function AACSeq3 = AACoder3(fNameIn, fnameAACoded) if i < 2 % TODO: what happens on the first two frames? + SL = frameF(:, 1); + SR = frameF(:, 2); + continue; else prev1FrameStart = (i - 1) * 1024 + 1; prev1FrameStop = prev1FrameStart + 2047; prev2FrameStart = (i - 2) * 1024 + 1; - prev2FrameStop = prev1FrameStart + 2047; - SMR = psycho(... - originalAudioData(currFrameStart:currFrameStop, :), ... + prev2FrameStop = prev2FrameStart + 2047; + SMRL = psycho(... + originalAudioData(currFrameStart:currFrameStop, 1), ... frameTypes{i+1}, ... - originalAudioData(prev1FrameStart:prev1FrameStop, :), ... - originalAudioData(prev2FrameStart:prev2FrameStop, :)); - [SL, sfcL, GL] = AACquantizer(frameF, frameTypes{i+1}, SMR); - [SR, sfcR, GR] = AACquantizer(frameF, frameTypes{i+1}, SMR); + originalAudioData(prev1FrameStart:prev1FrameStop, 1), ... + originalAudioData(prev2FrameStart:prev2FrameStop, 1)); + SMRR = psycho(... + originalAudioData(currFrameStart:currFrameStop, 2), ... + frameTypes{i+1}, ... + originalAudioData(prev1FrameStart:prev1FrameStop, 2), ... + originalAudioData(prev2FrameStart:prev2FrameStop, 2)); + [SL, sfcL, GL] = AACquantizer(frameF, frameTypes{i+1}, SMRL); + [SR, sfcR, GR] = AACquantizer(frameF, frameTypes{i+1}, SMRR); end [streamL, huffcodebookL] = encodeHuff(SL, huffLUT); diff --git a/Level_3/psycho.m b/Level_3/psycho.m index 24e79e5..6b9c0b0 100644 --- a/Level_3/psycho.m +++ b/Level_3/psycho.m @@ -18,6 +18,107 @@ function SMR = psycho(frameT, frameType, frameTprev1, frameTprev2) % - SMR is the signal to mask ratio array of dimensions 42X8 for % EIGHT_SHORT_SEQUENCE frames and 69X1 otherwise - -end + % Declares constant numbers of bands for long and short windows + LONG_WINDOW_NUMBER_OF_BANDS = 69; + SHORT_WINDOW_NUMBER_OF_BANDS = 42; + + % Declares persistent variable holding the TNS tables and initializes if empty + persistent TNSTables spreadingLong spreadingShort hannLong hannShort; + if isempty(TNSTables) || isempty(spreadingLong) || ... + isempty(spreadingShort) || isempty(hannLong) || isempty(hannShort) + TNSTables = load('TableB219.mat'); + + spreadingLong = zeros(LONG_WINDOW_NUMBER_OF_BANDS); + spreadingShort = zeros(SHORT_WINDOW_NUMBER_OF_BANDS); + + % Calculates all possible spreading function results for long + % windows + tmpx = repmat(TNSTables.B219a(:, 5)', LONG_WINDOW_NUMBER_OF_BANDS, 1); + tmpx = tmpx - repmat(TNSTables.B219a(:, 5), 1, LONG_WINDOW_NUMBER_OF_BANDS); + indeces = logical(tril(ones(LONG_WINDOW_NUMBER_OF_BANDS))); + tmpx(indeces) = tmpx(indeces) .* 3; + indeces = ~indeces; + tmpx(indeces) = tmpx(indeces) .* 1.5; + + tmpz = 8 * min((tmpx - 0.5) .^ 2 - 2 * (tmpx - 0.5), 0); + tmpy = 15.811389 + 7.5 * (tmpx + 0.474) - 17.5 * sqrt(1 + (tmpx + 0.474) .^ 2); + + tmpSum = tmpz + tmpy; + spreadingLong(tmpy >= -100) = 10 .^ (tmpSum(tmpy >= -100) ./ 10); + + % Calculates all possible spreading function results for short + % windows + tmpx = repmat(TNSTables.B219b(:, 5)', SHORT_WINDOW_NUMBER_OF_BANDS, 1); + tmpx = tmpx - repmat(TNSTables.B219b(:, 5), 1, SHORT_WINDOW_NUMBER_OF_BANDS); + indeces = logical(tril(ones(SHORT_WINDOW_NUMBER_OF_BANDS))); + tmpx(indeces) = tmpx(indeces) .* 3; + indeces = ~indeces; + tmpx(indeces) = tmpx(indeces) .* 1.5; + + tmpz = 8 * min((tmpx - 0.5) .^ 2 - 2 * (tmpx - 0.5), 0); + tmpy = 15.811389 + 7.5 * (tmpx + 0.474) - 17.5 * sqrt(1 + (tmpx + 0.474) .^ 2); + + tmpSum = tmpz + tmpy; + spreadingShort(tmpy >= -100) = 10 .^ (tmpSum(tmpy >= -100) ./ 10); + + hannLong = 0.5 - 0.5 * cos((pi * (0:2047 + 0.5)) / 1024); + hannShort = 0.5 - 0.5 * cos((pi * (0:255 + 0.5)) / 128); + end + if ~strcmp(frameType, 'ESH') + % Applies window to the frames + windowedFrameT = frameT .* hannLong'; + windowedFrameTprev1 = frameTprev1 .* hannLong'; + windowedFrameTprev2 = frameTprev2 .* hannLong'; + + % Calculates the FFT of each frame + frameF = fft(windowedFrameT); + frameFMag = 2 * abs(frameF(1:1024)); + frameFPhase = angle(frameF(1:1024)); + + frameFrameFprev1 = fft(windowedFrameTprev1); + frameFrameFprev1Mag = 2 * abs(frameFrameFprev1(1:1024)); + frameFrameFprev1Phase = angle(frameFrameFprev1(1:1024)); + + frameFrameFprev2 = fft(windowedFrameTprev2); + frameFrameFprev2Mag = 2 * abs(frameFrameFprev2(1:1024)); + frameFrameFprev2Phase = angle(frameFrameFprev2(1:1024)); + + % Calculates the predicted magnitude and phase compontents of each + % frequency + magPred = 2 .* frameFrameFprev1Mag - frameFrameFprev2Mag; + phasePred = 2 .* frameFrameFprev1Phase - frameFrameFprev2Phase; + + % Calculates this frame's predictability + framePredictability = sqrt((frameFMag .* cos(frameFPhase) - ... + magPred .* cos(phasePred)) .^ 2 + ... + (frameFMag .* sin(frameFPhase) - ... + magPred .* sin(phasePred)) .^ 2) ./ ... + (frameFMag + abs(magPred)); + + % Calculates the energy and weighted predictability in the + % threshold calculation partitions + bandEnergy(LONG_WINDOW_NUMBER_OF_BANDS) = 0; + bandPredictability(LONG_WINDOW_NUMBER_OF_BANDS) = 0; + for band = 1:LONG_WINDOW_NUMBER_OF_BANDS + bandEnergy(band) = sumsqr(frameFMag(TNSTables.B219a(band, 2) + 1: ... + TNSTables.B219a(band, 3) + 1)); + bandPredictability(band) = sumsqr(frameFMag( ... + TNSTables.B219a(band, 2) + 1:TNSTables.B219a(band, 3) + 1) .* ... + framePredictability(TNSTables.B219a(band, 2) + 1: ... + TNSTables.B219a(band, 3) + 1)); + end + + % Convolves the partitioned energy and predictability with the + % spreading function + ecb = sum(bandEnergy .* spreadingLong', 2); + ct = sum(bandPredictability .* spreadingLong', 2); + + % Renormalizes values + cb = ct ./ ecb; + en = ecb ./ sum(spreadingLong, 1); + + % Calculates the tonality index + tb = -0.299 - 0.43 .* log(cb); + end +end