The Three-Curves example
using GeometricClusterAnalysis
using LinearAlgebra
using Plots
using Random
using Statistics
Generate the dataset
Let's generate a set of points that draws three curves with a different label.
nsignal = 500 # number of signal points
nnoise = 200 # number of outliers
dim = 2 # dimension of the data
sigma = 0.02 # standard deviation for the additive noise
nb_clusters = 3 # number of clusters
k = 15 # number of nearest neighbors
c = 30 # number of ellipsoids
iter_max = 100 # maximum number of iterations of the algorithm kPLM
nstart = 10 # number of initializations of the algorithm kPLM
rng = MersenneTwister(1234)
data = noisy_three_curves(rng, nsignal, nnoise, sigma, dim)
plot(data)
Hierarchical clustering
function f_Σ!(Σ) end
df = kplm(rng, data.points, k, c, nsignal, iter_max, nstart, f_Σ!)
mh = build_distance_matrix(df)
hc1 = hierarchical_clustering_lem(mh)
lims = (min(minimum(hc1.birth), minimum(hc1.death)),
max(maximum(hc1.birth), maximum(hc1.death[hc1.death .!= Inf]))+1)
plot(hc1, xlims = lims, ylims = lims)
nb_means_removed = 2
threshold = mean((hc1.birth[end - nb_means_removed],hc1.birth[end - nb_means_removed + 1]))
hc2 = hierarchical_clustering_lem(mh, infinity = Inf, threshold = threshold)
plot(hc2, xlims = lims, ylims = lims)
bd = birth_death(hc2)
sort!(bd)
infinity = mean((bd[end - nb_clusters],bd[end - nb_clusters + 1]))
hc3 = hierarchical_clustering_lem(mh; infinity = infinity, threshold = threshold)
plot(hc3, xlims = lims, ylims = lims)
color_final = color_points_from_centers( data.points, k, nsignal, df, hc3)
remain_indices = hc3.startup_indices
ellipsoids(data.points, remain_indices, color_final, color_final, df, 0 )