The Three-Curves example
using GeometricClusterAnalysis
using LinearAlgebra
using Plots
using Random
using StatisticsGenerate the dataset
Let's generate a set of points that draws three curves with a different label.
nsignal = 500    # number of signal points
nnoise = 200     # number of outliers
dim = 2          # dimension of the data
sigma = 0.02     # standard deviation for the additive noise
nb_clusters = 3  # number of clusters
k = 15           # number of nearest neighbors
c = 30           # number of ellipsoids
iter_max = 100   # maximum number of iterations of the algorithm kPLM
nstart = 10      # number of initializations of the algorithm kPLM
rng = MersenneTwister(1234)
data = noisy_three_curves(rng, nsignal, nnoise, sigma, dim)
plot(data)Hierarchical clustering
function f_Σ!(Σ) end
df = kplm(rng, data.points, k, c, nsignal, iter_max, nstart, f_Σ!)
mh = build_distance_matrix(df)
hc1 = hierarchical_clustering_lem(mh)
lims = (min(minimum(hc1.birth), minimum(hc1.death)),
        max(maximum(hc1.birth), maximum(hc1.death[hc1.death .!= Inf]))+1)
plot(hc1, xlims = lims, ylims = lims)nb_means_removed = 2
threshold = mean((hc1.birth[end - nb_means_removed],hc1.birth[end - nb_means_removed + 1]))
hc2 = hierarchical_clustering_lem(mh, infinity = Inf, threshold = threshold)
plot(hc2, xlims = lims, ylims = lims)bd = birth_death(hc2)
sort!(bd)
infinity = mean((bd[end - nb_clusters],bd[end - nb_clusters + 1]))
hc3 = hierarchical_clustering_lem(mh; infinity = infinity, threshold = threshold)
plot(hc3, xlims = lims, ylims = lims)color_final = color_points_from_centers( data.points, k, nsignal, df, hc3)
remain_indices = hc3.startup_indices
ellipsoids(data.points, remain_indices, color_final, color_final, df, 0 )