You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
232 lines
10 KiB
232 lines
10 KiB
function SMR = psycho(frameT, frameType, frameTprev1, frameTprev2)
|
|
%Implementation of Psychoacoustic Model
|
|
% Usage SMR = psycho(frameT, frameType, frameTprev1, frameTprev2), where:
|
|
% Inputs
|
|
% - frameT is a frame in the time domain, containing only one of the
|
|
% audio channels stored in a vector of length 2048
|
|
% - frameType is the type of the current frame in string
|
|
% representation, can be one of "OLS" (ONLY_LONG_SEQUENCE), "LSS"
|
|
% (LONG_START_SEQUENCE), "ESH" (EIGHT_SHORT_SEQUENCE), "LPS"
|
|
% (LONG_STOP_SEQUENCE)
|
|
% - frameTprev1 is the previous frame in the time domain, containing
|
|
% only one of the audio channels stored in a vector of length 2048
|
|
% - frameTprev2 is the frame before frameTprev1 in the time domain,
|
|
% containing only one of the audio channels stored in a vector of
|
|
% length 2048
|
|
%
|
|
% Output
|
|
% - SMR is the signal to mask ratio array of dimensions 42X8 for
|
|
% EIGHT_SHORT_SEQUENCE frames and 69X1 otherwise
|
|
|
|
% Declares constant numbers of bands for long and short windows
|
|
LONG_WINDOW_NUMBER_OF_BANDS = 69;
|
|
SHORT_WINDOW_NUMBER_OF_BANDS = 42;
|
|
% Declares constant noise masking tone and tone masking noise decibels
|
|
NOISE_MASKING_TONE = 6;
|
|
TONE_MASKING_NOISE = 18;
|
|
|
|
% Declares persistent variable holding the TNS tables and initializes if empty
|
|
persistent TNSTables spreadingLong spreadingShort hannLong hannShort;
|
|
if isempty(TNSTables) || isempty(spreadingLong) || ...
|
|
isempty(spreadingShort) || isempty(hannLong) || isempty(hannShort)
|
|
TNSTables = load('TableB219.mat');
|
|
|
|
spreadingLong = zeros(LONG_WINDOW_NUMBER_OF_BANDS);
|
|
spreadingShort = zeros(SHORT_WINDOW_NUMBER_OF_BANDS);
|
|
|
|
% Calculates all possible spreading function results for long
|
|
% windows
|
|
tmpx = repmat(TNSTables.B219a(:, 5)', LONG_WINDOW_NUMBER_OF_BANDS, 1);
|
|
tmpx = tmpx - repmat(TNSTables.B219a(:, 5), 1, LONG_WINDOW_NUMBER_OF_BANDS);
|
|
indeces = logical(tril(ones(LONG_WINDOW_NUMBER_OF_BANDS)));
|
|
tmpx(indeces) = tmpx(indeces) .* 3;
|
|
indeces = ~indeces;
|
|
tmpx(indeces) = tmpx(indeces) .* 1.5;
|
|
|
|
tmpz = 8 * min((tmpx - 0.5) .^ 2 - 2 * (tmpx - 0.5), 0);
|
|
tmpy = 15.811389 + 7.5 * (tmpx + 0.474) - 17.5 * sqrt(1 + (tmpx + 0.474) .^ 2);
|
|
|
|
tmpSum = tmpz + tmpy;
|
|
spreadingLong(tmpy >= -100) = 10 .^ (tmpSum(tmpy >= -100) ./ 10);
|
|
|
|
% Calculates all possible spreading function results for short
|
|
% windows
|
|
tmpx = repmat(TNSTables.B219b(:, 5)', SHORT_WINDOW_NUMBER_OF_BANDS, 1);
|
|
tmpx = tmpx - repmat(TNSTables.B219b(:, 5), 1, SHORT_WINDOW_NUMBER_OF_BANDS);
|
|
indeces = logical(tril(ones(SHORT_WINDOW_NUMBER_OF_BANDS)));
|
|
tmpx(indeces) = tmpx(indeces) .* 3;
|
|
indeces = ~indeces;
|
|
tmpx(indeces) = tmpx(indeces) .* 1.5;
|
|
|
|
tmpz = 8 * min((tmpx - 0.5) .^ 2 - 2 * (tmpx - 0.5), 0);
|
|
tmpy = 15.811389 + 7.5 * (tmpx + 0.474) - 17.5 * sqrt(1 + (tmpx + 0.474) .^ 2);
|
|
|
|
tmpSum = tmpz + tmpy;
|
|
spreadingShort(tmpy >= -100) = 10 .^ (tmpSum(tmpy >= -100) ./ 10);
|
|
|
|
hannLong = 0.5 - 0.5 * cos(pi * ((0:2047) + 0.5) / 1024)';
|
|
hannShort = 0.5 - 0.5 * cos(pi * ((0:255) + 0.5) / 128)';
|
|
|
|
clearvars tmpx tmpz tmpy
|
|
end
|
|
|
|
if ~strcmp(frameType, 'ESH')
|
|
% Applies window to the frames
|
|
windowedFrameT = frameT .* hannLong;
|
|
windowedFrameTprev1 = frameTprev1 .* hannLong;
|
|
windowedFrameTprev2 = frameTprev2 .* hannLong;
|
|
|
|
% Calculates the FFT of each frame
|
|
frameF = fft(windowedFrameT);
|
|
frameFMag = abs(frameF(1:1024));
|
|
frameFPhase = angle(frameF(1:1024));
|
|
|
|
frameFrameFprev1 = fft(windowedFrameTprev1);
|
|
frameFrameFprev1Mag = abs(frameFrameFprev1(1:1024));
|
|
frameFrameFprev1Phase = angle(frameFrameFprev1(1:1024));
|
|
|
|
frameFrameFprev2 = fft(windowedFrameTprev2);
|
|
frameFrameFprev2Mag = abs(frameFrameFprev2(1:1024));
|
|
frameFrameFprev2Phase = angle(frameFrameFprev2(1:1024));
|
|
|
|
% Calculates the predicted magnitude and phase compontents of each
|
|
% frequency
|
|
magPred = 2 .* frameFrameFprev1Mag - frameFrameFprev2Mag;
|
|
phasePred = 2 .* frameFrameFprev1Phase - frameFrameFprev2Phase;
|
|
|
|
% Calculates this frame's predictability
|
|
framePredictability = sqrt((frameFMag .* cos(frameFPhase) - ...
|
|
magPred .* cos(phasePred)) .^ 2 + ...
|
|
(frameFMag .* sin(frameFPhase) - ...
|
|
magPred .* sin(phasePred)) .^ 2) ./ ...
|
|
(frameFMag + abs(magPred));
|
|
|
|
% Calculates the energy and weighted predictability in the
|
|
% threshold calculation partitions
|
|
bandEnergy(LONG_WINDOW_NUMBER_OF_BANDS, 1) = 0;
|
|
bandPredictability(LONG_WINDOW_NUMBER_OF_BANDS, 1) = 0;
|
|
for band = 1:LONG_WINDOW_NUMBER_OF_BANDS
|
|
bandEnergy(band) = sumsqr(frameFMag(TNSTables.B219a(band, 2) + 1: ...
|
|
TNSTables.B219a(band, 3) + 1));
|
|
bandPredictability(band) = sum(frameFMag( ...
|
|
TNSTables.B219a(band, 2) + 1:TNSTables.B219a(band, 3) + 1) .^ 2 .* ...
|
|
framePredictability(TNSTables.B219a(band, 2) + 1: ...
|
|
TNSTables.B219a(band, 3) + 1));
|
|
end
|
|
|
|
% Convolves the partitioned energy and predictability with the
|
|
% spreading function
|
|
bandEnergyConv = sum(bandEnergy .* spreadingLong, 2);
|
|
bandPredictabilityConv = sum(bandPredictability .* spreadingLong, 2);
|
|
|
|
% Renormalizes values
|
|
bandPredictabilityConv = bandPredictabilityConv ./ bandEnergyConv;
|
|
bandEnergyConv = bandEnergyConv ./ sum(spreadingLong, 1)';
|
|
|
|
% Calculates the tonality index
|
|
tonalIndex = -0.299 - 0.43 .* log(bandPredictabilityConv);
|
|
tonalIndex(tonalIndex < 0) = 0;
|
|
tonalIndex(tonalIndex > 1) = 1;
|
|
|
|
% Calculates SNR and converts from dB to power ratio
|
|
signalToNoiseRatio = tonalIndex .* TONE_MASKING_NOISE + ...
|
|
(1 - tonalIndex) .* NOISE_MASKING_TONE;
|
|
powerRatio = 10 .^ (-signalToNoiseRatio ./ 10);
|
|
|
|
% Calculates the energy threshold
|
|
energyThreshold = bandEnergyConv .* powerRatio;
|
|
|
|
% Calculates the noise level
|
|
qThrN = eps() * 1024 .* 10 .^ (TNSTables.B219a(:, 6) ./ 10);
|
|
noiseLevel = max(energyThreshold, qThrN);
|
|
|
|
SMR = bandEnergy ./ noiseLevel;
|
|
else
|
|
% Splits the frame into sub-frames
|
|
[subFramesT, ~] = buffer(frameT(449:end-448), 256, 128, 'nodelay');
|
|
[subFramesTprev1, ~] = buffer(frameTprev1(449:end-448), 256, 128, 'nodelay');
|
|
|
|
bandEnergy(SHORT_WINDOW_NUMBER_OF_BANDS, 1) = 0;
|
|
bandPredictability(SHORT_WINDOW_NUMBER_OF_BANDS, 1) = 0;
|
|
qThrN = eps() * 128 .* 10 .^ (TNSTables.B219b(:, 6) ./ 10);
|
|
SMR(SHORT_WINDOW_NUMBER_OF_BANDS, 8) = 0;
|
|
|
|
for subFrameIndex = 1:8
|
|
% Applies window to the frames
|
|
windowedFrameT = subFramesT(:, subFrameIndex) .* hannShort;
|
|
if subFrameIndex == 1
|
|
windowedFrameTprev1 = subFramesTprev1(:, 8) .* hannShort;
|
|
windowedFrameTprev2 = subFramesTprev1(:, 7) .* hannShort;
|
|
elseif subFrameIndex == 2
|
|
windowedFrameTprev1 = subFramesT(:, subFrameIndex - 1) .* hannShort;
|
|
windowedFrameTprev2 = subFramesTprev1(:, 8) .* hannShort;
|
|
else
|
|
windowedFrameTprev1 = subFramesT(:, subFrameIndex - 1) .* hannShort;
|
|
windowedFrameTprev2 = subFramesT(:, subFrameIndex - 1) .* hannShort;
|
|
end
|
|
|
|
% Calculates the FFT of each frame
|
|
frameF = fft(windowedFrameT);
|
|
frameFMag = abs(frameF(1:128));
|
|
frameFPhase = angle(frameF(1:128));
|
|
|
|
frameFrameFprev1 = fft(windowedFrameTprev1);
|
|
frameFrameFprev1Mag = abs(frameFrameFprev1(1:128));
|
|
frameFrameFprev1Phase = angle(frameFrameFprev1(1:128));
|
|
|
|
frameFrameFprev2 = fft(windowedFrameTprev2);
|
|
frameFrameFprev2Mag = abs(frameFrameFprev2(1:128));
|
|
frameFrameFprev2Phase = angle(frameFrameFprev2(1:128));
|
|
|
|
% Calculates the predicted magnitude and phase compontents of each
|
|
% frequency
|
|
magPred = 2 .* frameFrameFprev1Mag - frameFrameFprev2Mag;
|
|
phasePred = 2 .* frameFrameFprev1Phase - frameFrameFprev2Phase;
|
|
|
|
% Calculates this frame's predictability
|
|
framePredictability = sqrt((frameFMag .* cos(frameFPhase) - ...
|
|
magPred .* cos(phasePred)) .^ 2 + ...
|
|
(frameFMag .* sin(frameFPhase) - ...
|
|
magPred .* sin(phasePred)) .^ 2) ./ ...
|
|
(frameFMag + abs(magPred));
|
|
|
|
% Calculates the energy and weighted predictability in the
|
|
% threshold calculation partitions
|
|
for band = 1:SHORT_WINDOW_NUMBER_OF_BANDS
|
|
bandEnergy(band) = sumsqr(frameFMag(TNSTables.B219b(band, 2) + 1: ...
|
|
TNSTables.B219b(band, 3) + 1));
|
|
bandPredictability(band) = sum(frameFMag( ...
|
|
TNSTables.B219b(band, 2) + 1:TNSTables.B219b(band, 3) + 1) .^ 2 .* ...
|
|
framePredictability(TNSTables.B219b(band, 2) + 1: ...
|
|
TNSTables.B219b(band, 3) + 1));
|
|
end
|
|
|
|
% Convolves the partitioned energy and predictability with the
|
|
% spreading function
|
|
bandEnergyConv = sum(bandEnergy .* spreadingShort', 2);
|
|
bandPredictabilityConv = sum(bandPredictability .* spreadingShort', 2);
|
|
|
|
% Renormalizes values
|
|
bandPredictabilityConv = bandPredictabilityConv ./ bandEnergyConv;
|
|
bandEnergyConv = bandEnergyConv ./ sum(spreadingShort, 1)';
|
|
|
|
% Calculates the tonality index
|
|
tonalIndex = -0.299 - 0.43 .* log(bandPredictabilityConv);
|
|
tonalIndex(tonalIndex < 0) = 0;
|
|
tonalIndex(tonalIndex > 1) = 1;
|
|
|
|
% Calculates SNR and converts from dB to power ratio
|
|
signalToNoiseRatio = tonalIndex .* TONE_MASKING_NOISE + ...
|
|
(1 - tonalIndex) .* NOISE_MASKING_TONE;
|
|
powerRatio = 10 .^ (-signalToNoiseRatio ./ 10);
|
|
|
|
% Calculates the energy threshold
|
|
energyThreshold = bandEnergyConv .* powerRatio;
|
|
|
|
% Calculates the noise level
|
|
noiseLevel = max(energyThreshold, qThrN);
|
|
|
|
SMR(:, subFrameIndex) = bandEnergy ./ noiseLevel;
|
|
end
|
|
end
|
|
end
|
|
|