You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
370 lines
14 KiB
370 lines
14 KiB
%% AUTHOR[1] : Apostolos Fanakis (8261)
|
|
%% EMAIL[1] : apostolof@auth.gr
|
|
%% AUTHOR[2] : Charalampos Papadiakos (8302)
|
|
%% EMAIL[2] : charaldp@ece.auth.gr
|
|
%% AUTHOR[3] : Hlektra Mitsi ()
|
|
%% EMAIL[3] :
|
|
%% $DATE : 28-December-2018 12:45:00 $
|
|
%% $Revision : 1.00 $
|
|
%% DEVELOPED : 9.0.0.341360 (R2016a)
|
|
%% FILENAME : spike_sorting.m
|
|
%%
|
|
%% =================================================================================================
|
|
%% S.1
|
|
datasetMedians(8) = 0;
|
|
datasetFactors(8) = 0;
|
|
|
|
thresholdFactorInitValue = 3; % k starting value
|
|
thresholdFactorEndValue = 12; % k ending value
|
|
thresholdFactorStep = 0.01; % k jumping step
|
|
numberOfFactors = length(thresholdFactorInitValue:thresholdFactorStep:thresholdFactorEndValue);
|
|
numberOfSpikesPerFactor(numberOfFactors) = 0;
|
|
|
|
for fileIndex=1:8
|
|
fprintf('Loading test dataset no. %d\n', fileIndex);
|
|
filename = sprintf('dataset\\Data_Test_%d.mat', fileIndex);
|
|
Dataset = load(filename);
|
|
data = double(Dataset.data);
|
|
|
|
%% Q.1.1
|
|
figure();
|
|
plot(data(1:10000));
|
|
xlim([0, 10000]);
|
|
title(['First 10000 samples of dataset #' num2str(fileIndex)]);
|
|
xlabel('Sample #');
|
|
ylabel('Trivial Unit'); %TODO: Is this mVolts?
|
|
drawnow;
|
|
|
|
%% Q.1.2
|
|
dataMedian = median(abs(data)/0.6745);
|
|
datasetMedians(fileIndex) = dataMedian;
|
|
|
|
for factorIteration=1:numberOfFactors % runs for each k
|
|
% builds threshold
|
|
thresholdFactor = thresholdFactorInitValue + (factorIteration - 1) * thresholdFactorStep;
|
|
threshold = thresholdFactor * dataMedian;
|
|
|
|
% calculates number of spikes
|
|
sample = 1;
|
|
while sample <= length(data)
|
|
if data(sample) >= threshold
|
|
% spike found
|
|
numberOfSpikesPerFactor(factorIteration) = numberOfSpikesPerFactor(factorIteration) + 1;
|
|
|
|
% skips cheking until values are below threshold again
|
|
while sample <= length(data)
|
|
sample = sample + 1;
|
|
if (data(sample) <= threshold)
|
|
break;
|
|
end
|
|
end
|
|
end
|
|
sample = sample + 1;
|
|
end
|
|
end
|
|
|
|
figure();
|
|
% trims zeros
|
|
numberOfSpikesTrimmed = numberOfSpikesPerFactor(1:find(numberOfSpikesPerFactor,1,'last'));
|
|
endValue = thresholdFactorInitValue + thresholdFactorStep * (length(numberOfSpikesTrimmed) - 1);
|
|
plot(thresholdFactorInitValue:thresholdFactorStep:endValue, numberOfSpikesTrimmed);
|
|
title(['Number of spikes for different values of k for dataset #' num2str(fileIndex)]);
|
|
xlabel('Threshold factor (k)');
|
|
ylabel('Number of spikes');
|
|
hold on;
|
|
plot([thresholdFactorInitValue endValue], [double(Dataset.spikeNum), double(Dataset.spikeNum)]);
|
|
xlim([thresholdFactorInitValue endValue]);
|
|
drawnow;
|
|
hold off;
|
|
|
|
% finds dataset's theshold factor k that produces the closest number of
|
|
% spikes ot the ground truth
|
|
[~, closestIndex] = min(abs(numberOfSpikesPerFactor-Dataset.spikeNum));
|
|
datasetFactors(fileIndex) = thresholdFactorInitValue + (closestIndex - 1) * thresholdFactorStep;
|
|
|
|
clear Dataset
|
|
clear data
|
|
end
|
|
fprintf('\n');
|
|
|
|
%% Q.1.3
|
|
figure();
|
|
plot(datasetMedians, datasetFactors, 'o');
|
|
title('Polynomial curve fitting on median-threshold factor value pairs');
|
|
xlabel('Dataset median');
|
|
ylabel('Threshold factor');
|
|
hold on;
|
|
|
|
empiricalRule = polyfit(datasetMedians, datasetFactors, 8);
|
|
visualizationX = linspace(0, 0.5, 50);
|
|
visualizationY = polyval(empiricalRule, visualizationX);
|
|
plot(visualizationX, visualizationY);
|
|
hold off
|
|
|
|
%% =================================================================================================
|
|
%% S.2
|
|
clearvars = {'closestIndex' 'datasetFactors' 'datasetMedians' 'endValue' 'minValue' ...
|
|
'numberOfFactors' 'numberOfSpikesPerFactor' 'numberOfSpikesTrimmed' 'thresholdFactorEndValue' ...
|
|
'thresholdFactorInitValue' 'thresholdFactorStep' 'visualizationX' 'visualizationY'};
|
|
clear(clearvars{:})
|
|
clear clearvars
|
|
|
|
for fileIndex=1:4
|
|
fprintf('Loading evaluation dataset no. %d \n', fileIndex);
|
|
filename = sprintf('dataset\\Data_Eval_E_%d.mat', fileIndex);
|
|
Dataset = load(filename);
|
|
data = double(Dataset.data);
|
|
|
|
%% Q.2.1 and Q.2.2
|
|
dataMedian = median(abs(data)/0.6745);
|
|
|
|
%factorEstimation = polyval(empiricalRule, dataMedian);
|
|
factorEstimation = 4;
|
|
threshold = factorEstimation * dataMedian;
|
|
numberOfSpikes = 0;
|
|
spikesTimesEst(2500) = 0;
|
|
spikesEst(2500, 64) = 0;
|
|
|
|
figure();
|
|
plot(data(1:10000));
|
|
xlim([0, 10000]);
|
|
title(['First 10000 samples of dataset #' num2str(fileIndex)]);
|
|
xlabel('Sample #');
|
|
ylabel('Trivial Unit'); %TODO: Is this mVolts?
|
|
hold on;
|
|
plot([1 10000], [threshold, threshold]);
|
|
drawnow;
|
|
|
|
% calculates number of spikes
|
|
spikeStartIndex = 1;
|
|
spikeEndIndex = 1;
|
|
sample = 1;
|
|
while sample <= length(data)
|
|
if data(sample) >= threshold
|
|
% spike found
|
|
numberOfSpikes = numberOfSpikes + 1;
|
|
spikeStartIndex = sample;
|
|
|
|
% skips cheking until values are below threshold again
|
|
while sample <= length(data)
|
|
sample = sample + 1;
|
|
if (data(sample) <= threshold)
|
|
% finds the index of the max sample for this spike
|
|
spikeEndIndex = sample;
|
|
[~, relativeMaxIndex] = max(data(spikeStartIndex:spikeEndIndex));
|
|
absoluteMaxIndex = spikeStartIndex - 1 + relativeMaxIndex;
|
|
|
|
% defines an area of -41/+22 samples around the max
|
|
% and searches for the min
|
|
[~, relativeMinIndex] = min(data(absoluteMaxIndex-41:absoluteMaxIndex+22));
|
|
absoluteMinIndex = absoluteMaxIndex - 41 + relativeMinIndex;
|
|
|
|
% discernes the extrema (minimum or maximum) that
|
|
% occurs first
|
|
firstIndex = min([absoluteMaxIndex absoluteMinIndex]);
|
|
|
|
% Q.2.1
|
|
spikesTimesEst(numberOfSpikes) = firstIndex;
|
|
% Q.2.2
|
|
spikesEst(numberOfSpikes, :) = data(firstIndex-34:firstIndex+29);
|
|
break;
|
|
end
|
|
end
|
|
end
|
|
sample = sample + 1;
|
|
end
|
|
|
|
fprintf('%d spikes found for dataset #%d\n', numberOfSpikes, fileIndex);
|
|
fprintf('actual number of spikes = %d\n', length(Dataset.spikeTimes));
|
|
fprintf('diff = %d\n\n', numberOfSpikes - length(Dataset.spikeTimes));
|
|
|
|
figure();
|
|
hold on;
|
|
for spike=1:numberOfSpikes
|
|
plot(1:64, spikesEst(spike, :));
|
|
end
|
|
title(['Spikes of dataset #' num2str(fileIndex) ' aligned at the first extrema']);
|
|
xlabel('Samples');
|
|
ylabel('Trivial Unit');
|
|
xlim([1 64]);
|
|
drawnow;
|
|
hold off;
|
|
|
|
%% Q.2.3
|
|
realSpikeIndex = double(Dataset.spikeTimes);
|
|
numberOfCorrectEstimations = 0;
|
|
numberOfUndetectedSpikes = 0;
|
|
averageEstimationError = 0;
|
|
|
|
correctSpikes(2500, 64) = 0;
|
|
spikeClass(2500) = 0;
|
|
|
|
for trueSpikeIndex=1:length(realSpikeIndex)
|
|
[estimationError, closestIndex] = min(abs(spikesTimesEst-realSpikeIndex(trueSpikeIndex)));
|
|
if estimationError < 32
|
|
numberOfCorrectEstimations = numberOfCorrectEstimations + 1;
|
|
averageEstimationError = averageEstimationError + estimationError;
|
|
|
|
correctSpikes(numberOfCorrectEstimations, :) = spikesEst(closestIndex, :);
|
|
spikeClass(numberOfCorrectEstimations) = double(Dataset.spikeClass(trueSpikeIndex));
|
|
else
|
|
numberOfUndetectedSpikes = numberOfUndetectedSpikes + 1;
|
|
end
|
|
end
|
|
|
|
averageEstimationError = averageEstimationError / numberOfSpikes;
|
|
|
|
fprintf('Number of correct spike detections is %d\n', numberOfCorrectEstimations);
|
|
if numberOfSpikes-numberOfCorrectEstimations > 0
|
|
fprintf('Number of uncorrect spike detections is %d\n', ...
|
|
numberOfSpikes - numberOfCorrectEstimations);
|
|
end
|
|
fprintf('Number of undetected spikes is %d\n', numberOfUndetectedSpikes);
|
|
fprintf('Average error of spike index estimation is %.2f\n\n', averageEstimationError);
|
|
|
|
%% Q.2.4
|
|
features(numberOfCorrectEstimations, 7) = 0;
|
|
pcaCoefficients = pca(correctSpikes, 'NumComponents', 2);
|
|
|
|
for spike=1:numberOfCorrectEstimations
|
|
% finds index of max
|
|
[maxVal, features(spike, 1)] = max(correctSpikes(spike, :));
|
|
% calculates Vpeak-to-peak
|
|
features(spike, 2) = maxVal - min(correctSpikes(spike, :));
|
|
% calculates ZCR
|
|
features(spike, 3) = mean(abs(diff(sign(correctSpikes(spike, :)))));
|
|
% calculates the signal energy
|
|
features(spike, 4) = sum(correctSpikes(spike, :).^2);
|
|
% calculates the fft of the signal
|
|
asud = fft(correctSpikes(spike, :));
|
|
features(spike, 5) = asud(1)^2;
|
|
|
|
features(spike, 6) = correctSpikes(spike, :) * pcaCoefficients(:, 1);
|
|
features(spike, 7) = correctSpikes(spike, :) * pcaCoefficients(:, 2);
|
|
end
|
|
|
|
figure();
|
|
scatter(features(:, 1), features(:, 2));
|
|
title(['Dataset #' num2str(fileIndex) ' feature plot']);
|
|
xlabel('Index of max value');
|
|
ylabel('Peak-to-peak amplitude');
|
|
figure();
|
|
scatter(features(:, 6), features(:, 7));
|
|
title(['Dataset #' num2str(fileIndex) ' feature plot']);
|
|
xlabel('PCA feature 1');
|
|
ylabel('PCA feature 2');
|
|
|
|
%% Q.2.5
|
|
accuracy = MyClassify(features, spikeClass');
|
|
fprintf('Accuracy achieved is %.2f%%\n\n', accuracy);
|
|
|
|
% clustering using DB-SCAN algorithm
|
|
% code for DB-SCAN downloaded from here:
|
|
% https://www.peterkovesi.com/matlabfns/
|
|
if fileIndex == 1
|
|
distThreshold = 0.4;
|
|
minPts = 20;
|
|
elseif fileIndex == 2
|
|
distThreshold = 0.15;
|
|
minPts = 30;
|
|
elseif fileIndex == 3
|
|
distThreshold = 0.32;
|
|
minPts = 30;
|
|
else
|
|
distThreshold = 0.36;
|
|
minPts = 35;
|
|
end
|
|
|
|
[~, dbScanClasses, ~] = dbscan(features(:, 6:7)', distThreshold, minPts);
|
|
%[dbScanClasses, accuracy] = ShuffledClassAccuracy(spikeClass, dbScanClasses);
|
|
% fixes classes enumeration
|
|
if fileIndex == 1
|
|
dbScanClasses(dbScanClasses==1) = 7;
|
|
dbScanClasses(dbScanClasses==3) = 1;
|
|
dbScanClasses(dbScanClasses==7) = 3;
|
|
elseif fileIndex == 3
|
|
dbScanClasses(dbScanClasses==1) = 7;
|
|
dbScanClasses(dbScanClasses==2) = 1;
|
|
dbScanClasses(dbScanClasses==7) = 2;
|
|
elseif fileIndex == 4
|
|
dbScanClasses(dbScanClasses==1) = 7;
|
|
dbScanClasses(dbScanClasses==3) = 1;
|
|
dbScanClasses(dbScanClasses==2) = 3;
|
|
dbScanClasses(dbScanClasses==7) = 2;
|
|
end
|
|
|
|
figure();
|
|
scatter(features(dbScanClasses == 0, 6), features(dbScanClasses == 0, 7), [], 'k', 'o');
|
|
hold on;
|
|
scatter(features(dbScanClasses == 1, 6), features(dbScanClasses == 1, 7), [], 'b', '*');
|
|
scatter(features(dbScanClasses == 2, 6), features(dbScanClasses == 2, 7), [], 'r', '*');
|
|
scatter(features(dbScanClasses == 3, 6), features(dbScanClasses == 3, 7), [], 'g', '*');
|
|
title(['Dataset #' num2str(fileIndex) ' feature plot after clustering with DB-SCAN']);
|
|
xlabel('PCA feature 1');
|
|
ylabel('PCA feature 2');
|
|
accuracy = classperf(spikeClass',dbScanClasses);
|
|
fprintf('Accuracy achieved with DB-SCAN is %.2f%%\n\n', accuracy.CorrectRate*100);
|
|
|
|
% hierarchical clustering
|
|
distances = pdist(features(:, 6:7));
|
|
linkages = linkage(distances, 'ward');
|
|
hierarchicalClusters = cluster(linkages, 'maxclust', 3);
|
|
% fixes classes enumeration
|
|
if fileIndex == 1
|
|
hierarchicalClusters(hierarchicalClusters==1) = 7;
|
|
hierarchicalClusters(hierarchicalClusters==2) = 1;
|
|
hierarchicalClusters(hierarchicalClusters==7) = 2;
|
|
elseif fileIndex == 3
|
|
hierarchicalClusters(hierarchicalClusters==1) = 7;
|
|
hierarchicalClusters(hierarchicalClusters==2) = 1;
|
|
hierarchicalClusters(hierarchicalClusters==7) = 2;
|
|
end
|
|
|
|
figure();
|
|
scatter(features(hierarchicalClusters == 0, 6), features(hierarchicalClusters == 0, 7), [], 'k', 'o');
|
|
hold on;
|
|
scatter(features(hierarchicalClusters == 2, 6), features(hierarchicalClusters == 2, 7), [], 'r', '*');
|
|
scatter(features(hierarchicalClusters == 3, 6), features(hierarchicalClusters == 3, 7), [], 'g', '*');
|
|
scatter(features(hierarchicalClusters == 1, 6), features(hierarchicalClusters == 1, 7), [], 'b', '*');
|
|
title(['Dataset #' num2str(fileIndex) ' feature plot after clustering with hierarchical clustering']);
|
|
xlabel('PCA feature 1');
|
|
ylabel('PCA feature 2');
|
|
accuracy = classperf(spikeClass',hierarchicalClusters);
|
|
fprintf('Accuracy achieved with hierarchical clustering is %.2f%%\n\n', accuracy.CorrectRate*100);
|
|
|
|
% clustering using kmeans algorithm
|
|
rng(1); % For reproducibility
|
|
kMeansClasses = kmeans(features(:, 6:7), 3);
|
|
% fixes classes enumeration
|
|
if fileIndex == 1
|
|
kMeansClasses(kMeansClasses==2) = 7;
|
|
kMeansClasses(kMeansClasses==3) = 2;
|
|
kMeansClasses(kMeansClasses==7) = 3;
|
|
elseif fileIndex == 2
|
|
kMeansClasses(kMeansClasses==1) = 7;
|
|
kMeansClasses(kMeansClasses==2) = 1;
|
|
kMeansClasses(kMeansClasses==7) = 2;
|
|
elseif fileIndex == 3
|
|
kMeansClasses(kMeansClasses==1) = 7;
|
|
kMeansClasses(kMeansClasses==2) = 1;
|
|
kMeansClasses(kMeansClasses==3) = 2;
|
|
kMeansClasses(kMeansClasses==7) = 3;
|
|
end
|
|
|
|
figure();
|
|
scatter(features(kMeansClasses == 1, 6), features(kMeansClasses == 1, 7), [], 'b', '*');
|
|
hold on;
|
|
scatter(features(kMeansClasses == 2, 6), features(kMeansClasses == 2, 7), [], 'r', '*');
|
|
scatter(features(kMeansClasses == 3, 6), features(kMeansClasses == 3, 7), [], 'g', '*');
|
|
title(['Dataset #' num2str(fileIndex) ' feature plot after clustering with K-Means']);
|
|
xlabel('PCA feature 1');
|
|
ylabel('PCA feature 2');
|
|
accuracy = classperf(spikeClass',kMeansClasses);
|
|
fprintf('Accuracy achieved with K-Means is %.2f%%\n\n', accuracy.CorrectRate*100);
|
|
|
|
clearvars = {'spikesTimesEst', 'spikesEst', 'data', 'features', 'realSpikeIndex', ...
|
|
'correctSpikes', 'spikeClass', 'Data', 'Dataset', 'pcaCoefficients', 'accuracy', 'asud', ...
|
|
'dbScanClasses', 'kMeansClasses'};
|
|
clear(clearvars{:})
|
|
clear clearvars
|
|
end
|
|
|