Apostolos Fanakis
6 years ago
2 changed files with 211 additions and 13 deletions
@ -0,0 +1,138 @@ |
|||
% DBSCAN DBSCAN clustering algorithm |
|||
% |
|||
% Usage: [C, ptsC, centres] = dbscan(P, E, minPts) |
|||
% |
|||
% Arguments: |
|||
% P - dim x Npts array of points. |
|||
% E - Distance threshold. |
|||
% minPts - Minimum number of points required to form a cluster. |
|||
% |
|||
% Returns: |
|||
% C - Cell array of length Nc listing indices of points associated with |
|||
% each cluster. |
|||
% ptsC - Array of length Npts listing the cluster number associated with |
|||
% each point. If a point is denoted as noise (not enough nearby |
|||
% elements to form a cluster) its cluster number is 0. |
|||
% centres - dim x Nc array of the average centre of each cluster. |
|||
|
|||
% Reference: |
|||
% Martin Ester, Hans-Peter Kriegel, Jörg Sander, Xiaowei Xu (1996). "A |
|||
% density-based algorithm for discovering clusters in large spatial databases |
|||
% with noise". Proceedings of the Second International Conference on Knowledge |
|||
% Discovery and Data Mining (KDD-96). AAAI Press. pp. 226-231. |
|||
% Also see: http://en.wikipedia.org/wiki/DBSCAN |
|||
|
|||
% Copyright (c) 2013 Peter Kovesi |
|||
% Centre for Exploration Targeting |
|||
% The University of Western Australia |
|||
% peter.kovesi at uwa edu au |
|||
% |
|||
% Permission is hereby granted, free of charge, to any person obtaining a copy |
|||
% of this software and associated documentation files (the "Software"), to deal |
|||
% in the Software without restriction, subject to the following conditions: |
|||
% |
|||
% The above copyright notice and this permission notice shall be included in |
|||
% all copies or substantial portions of the Software. |
|||
% |
|||
% The Software is provided "as is", without warranty of any kind. |
|||
|
|||
% PK January 2013 |
|||
|
|||
function [C, ptsC, centres] = dbscan(P, E, minPts) |
|||
|
|||
[dim, Npts] = size(P); |
|||
|
|||
ptsC = zeros(Npts,1); |
|||
C = {}; |
|||
Nc = 0; % Cluster counter. |
|||
Pvisit = zeros(Npts,1); % Array to keep track of points that have been visited. |
|||
|
|||
for n = 1:Npts |
|||
if ~Pvisit(n) % If this point not visited yet |
|||
Pvisit(n) = 1; % mark as visited |
|||
neighbourPts = regionQuery(P, n, E); % and find its neighbours |
|||
|
|||
if length(neighbourPts) < minPts-1 % Not enough points to form a cluster |
|||
ptsC(n) = 0; % Mark point n as noise. |
|||
|
|||
else % Form a cluster... |
|||
Nc = Nc + 1; % Increment number of clusters and process |
|||
% neighbourhood. |
|||
|
|||
C{Nc} = [n]; % Initialise cluster Nc with point n |
|||
ptsC(n) = Nc; % and mark point n as being a member of cluster Nc. |
|||
|
|||
ind = 1; % Initialise index into neighbourPts array. |
|||
|
|||
% For each point P' in neighbourPts ... |
|||
while ind <= length(neighbourPts) |
|||
|
|||
nb = neighbourPts(ind); |
|||
|
|||
if ~Pvisit(nb) % If this neighbour has not been visited |
|||
Pvisit(nb) = 1; % mark it as visited. |
|||
|
|||
% Find the neighbours of this neighbour and if it has |
|||
% enough neighbours add them to the neighbourPts list |
|||
neighbourPtsP = regionQuery(P, nb, E); |
|||
if length(neighbourPtsP) >= minPts |
|||
neighbourPts = [neighbourPts neighbourPtsP]; |
|||
end |
|||
end |
|||
|
|||
% If this neighbour nb not yet a member of any cluster add it |
|||
% to this cluster. |
|||
if ~ptsC(nb) |
|||
C{Nc} = [C{Nc} nb]; |
|||
ptsC(nb) = Nc; |
|||
end |
|||
|
|||
ind = ind + 1; % Increment neighbour point index and process |
|||
% next neighbour |
|||
end |
|||
end |
|||
end |
|||
end |
|||
|
|||
% Find centres of each cluster |
|||
centres = zeros(dim,length(C)); |
|||
for n = 1:length(C) |
|||
for k = 1:length(C{n}) |
|||
centres(:,n) = centres(:,n) + P(:,C{n}(k)); |
|||
end |
|||
centres(:,n) = centres(:,n)/length(C{n}); |
|||
end |
|||
|
|||
end % of dbscan |
|||
|
|||
%------------------------------------------------------------------------ |
|||
% Find indices of all points within distance E of point with index n |
|||
% This function could make use of a precomputed distance table to avoid |
|||
% repeated distance calculations, however this would require N^2 storage. |
|||
% Not a big problem either way if the number of points being clustered is |
|||
% small. For large datasets this function will need to be optimised. |
|||
|
|||
% Arguments: |
|||
% P - the dim x Npts array of data points |
|||
% n - Index of point of interest |
|||
% E - Distance threshold |
|||
|
|||
function neighbours = regionQuery(P, n, E) |
|||
|
|||
E2 = E^2; |
|||
[dim, Npts] = size(P); |
|||
neighbours = []; |
|||
|
|||
for i = 1:Npts |
|||
if i ~= n |
|||
% Test if distance^2 < E^2 |
|||
v = P(:,i)-P(:,n); |
|||
dist2 = v'*v; |
|||
if dist2 < E2 |
|||
neighbours = [neighbours i]; |
|||
end |
|||
end |
|||
end |
|||
|
|||
end % of regionQuery |
|||
|
Loading…
Reference in new issue