RMA with Machine Learning

Classification using `Flux.jl`

Flux.jl is a user-friendly machine learning library in Julia that provides a wide range of tools for building and training neural networks.

In this example, we demonstrate how to use Flux.jl together with RecurrenceMicrostatesAnalysis.jl to train a multilayer perceptron (MLP) for classifying a dynamical system, following the approach presented in (Spezzatto et al., 2024).

Importing required packages

using Flux
using DynamicalSystems
using RecurrenceMicrostatesAnalysis

Generating data

We use the Lorenz system as the data source, which can be simulated using DynamicalSystems.jl:

function lorenz!(u, p, t)
    σ, ρ, β = p
    x, y, z = u

    return SVector(
        σ * (y - x),
        x * (ρ - z) - y,
        x * y - β * z
    )
end

lorenz! (generic function with 1 method)

function lorenz_trajectory(σ, ρ, β; u0 = rand(3), t = 250.0, Ttr = 1200.0, Δt_sample = 0.2)
    p = (σ, ρ, β)
    cds =  ContinuousDynamicalSystem(lorenz!, u0, p)
    x, _ = trajectory(cds, t; Ttr = Ttr, Δt = Δt_sample)
    return x
end

lorenz_trajectory (generic function with 1 method)

We fix the parameters $\sigma = 10$ and $\beta = 8/3$, and vary $\rho \in {26.0, 27.0, 28.0, 29.0, 30.0}$. The goal is to generate time series for each value of $\rho$ and train a classifier to identify which parameter value generated a given trajectory.

Creating training and test datasets

First, we define the classes and the size of the training and test datasets:

ρ_cls = [26.0, 27.0, 28.0, 29.0, 30.0];
num_samples_to_test = 50;
num_samples_to_train = 200;

First, we define the classes and the size of the training and test datasets:

train_timeseries = Vector{StateSpaceSet}(undef, length(ρ_cls) * num_samples_to_train)
test_timeseries = Vector{StateSpaceSet}(undef, length(ρ_cls) * num_samples_to_test)

train_labels = Vector{Float64}(undef, length(ρ_cls) * num_samples_to_train)
test_labels = Vector{Float64}(undef, length(ρ_cls) * num_samples_to_test)

250-element Vector{Float64}:
 8.4e-323
 8.4e-323
 6.8986485577801e-310
 6.8986485577817e-310
 9.0e-323
 9.0e-323
 6.89864855778326e-310
 6.89864855778484e-310
 9.4e-323
 1.0e-322
 ⋮
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0

The following function generates the data:

function generate_data!(labels, data, classes, num_elements_per_class)
    c = 1
    for i ∈ eachindex(labels)
        labels[i] = classes[c]
        data[i] = lorenz_trajectory(10.0, classes[c], 8/3)

        if (i % num_elements_per_class == 0)
            c += 1
        end
    end
end

generate_data! (generic function with 1 method)

generate_data!(train_labels, train_timeseries, ρ_cls, num_samples_to_train)
train_timeseries

1000-element Vector{StateSpaceSet}:
 3-dimensional StateSpaceSet{Float64} with 1251 points
 3-dimensional StateSpaceSet{Float64} with 1251 points
 3-dimensional StateSpaceSet{Float64} with 1251 points
 3-dimensional StateSpaceSet{Float64} with 1251 points
 3-dimensional StateSpaceSet{Float64} with 1251 points
 3-dimensional StateSpaceSet{Float64} with 1251 points
 3-dimensional StateSpaceSet{Float64} with 1251 points
 3-dimensional StateSpaceSet{Float64} with 1251 points
 3-dimensional StateSpaceSet{Float64} with 1251 points
 3-dimensional StateSpaceSet{Float64} with 1251 points
 ⋮
 3-dimensional StateSpaceSet{Float64} with 1251 points
 3-dimensional StateSpaceSet{Float64} with 1251 points
 3-dimensional StateSpaceSet{Float64} with 1251 points
 3-dimensional StateSpaceSet{Float64} with 1251 points
 3-dimensional StateSpaceSet{Float64} with 1251 points
 3-dimensional StateSpaceSet{Float64} with 1251 points
 3-dimensional StateSpaceSet{Float64} with 1251 points
 3-dimensional StateSpaceSet{Float64} with 1251 points
 3-dimensional StateSpaceSet{Float64} with 1251 points

generate_data!(test_labels, test_timeseries, ρ_cls, num_samples_to_test)
test_timeseries

250-element Vector{StateSpaceSet}:
 3-dimensional StateSpaceSet{Float64} with 1251 points
 3-dimensional StateSpaceSet{Float64} with 1251 points
 3-dimensional StateSpaceSet{Float64} with 1251 points
 3-dimensional StateSpaceSet{Float64} with 1251 points
 3-dimensional StateSpaceSet{Float64} with 1251 points
 3-dimensional StateSpaceSet{Float64} with 1251 points
 3-dimensional StateSpaceSet{Float64} with 1251 points
 3-dimensional StateSpaceSet{Float64} with 1251 points
 3-dimensional StateSpaceSet{Float64} with 1251 points
 3-dimensional StateSpaceSet{Float64} with 1251 points
 ⋮
 3-dimensional StateSpaceSet{Float64} with 1251 points
 3-dimensional StateSpaceSet{Float64} with 1251 points
 3-dimensional StateSpaceSet{Float64} with 1251 points
 3-dimensional StateSpaceSet{Float64} with 1251 points
 3-dimensional StateSpaceSet{Float64} with 1251 points
 3-dimensional StateSpaceSet{Float64} with 1251 points
 3-dimensional StateSpaceSet{Float64} with 1251 points
 3-dimensional StateSpaceSet{Float64} with 1251 points
 3-dimensional StateSpaceSet{Float64} with 1251 points

Preparing the input features

For each time series, we compute the RMA distribution and store it as a feature vector.

microstate_n = 3
train_dataset = Matrix{Float64}(undef, 2^(microstate_n * microstate_n) + 2, length(train_labels))
test_dataset = Matrix{Float64}(undef, 2^(microstate_n * microstate_n) + 2, length(test_labels))

514×250 Matrix{Float64}:
 0.0           2.0133e-315   …  0.0  0.0  0.0  0.0  0.0  0.0  0.0
 1.63826e-317  2.31207e-315     0.0  0.0  0.0  0.0  0.0  0.0  0.0
 6.89871e-310  2.3105e-315      0.0  0.0  0.0  0.0  0.0  0.0  0.0
 2.30987e-315  8.48798e-314     0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0           2.31207e-315     0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0           2.122e-314    …  0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0           0.0              0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0           0.0              0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0           0.0              0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0           0.0              0.0  0.0  0.0  0.0  0.0  0.0  0.0
 ⋮                           ⋱            ⋮                   
 2.122e-314    0.0           …  0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0           0.0              0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0           0.0              0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0           0.0              0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0           0.0              0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0           0.0           …  0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0           0.0              0.0  0.0  0.0  0.0  0.0  0.0  0.0
 5.0e-324      0.0              0.0  0.0  0.0  0.0  0.0  0.0  0.0
 2.0133e-315   0.0              0.0  0.0  0.0  0.0  0.0  0.0  0.0

The following function computes the RMA features:

function get_probs!(dataset, timeseries, n)
    for i ∈ eachindex(timeseries)
        th, s = optimize(Threshold(), RecurrenceEntropy(), timeseries[i], n)
        dist = distribution(timeseries[i], th, n)
        dataset[1, i] = th
        dataset[2, i] = s
        dataset[3:end, i] = dist[1:end]
    end
end

get_probs! (generic function with 1 method)

get_probs!(train_dataset, train_timeseries, microstate_n)
train_dataset

514×1000 Matrix{Float64}:
 17.2997       16.6256       16.5396       …  18.1221       18.1359
  5.16408       5.16866       5.14352          5.29422       5.28125
  0.00035897    0.00325637    0.00508968       0.00194869    0.000833323
  0.0122434     0.0182049     0.0215638        0.0139998     0.012987
  0.000846143   0.00326919    0.00408969       0.00389739    0.00242305
  0.0109999     0.0127434     0.0143973    …   0.0112563     0.0110511
  0.00420507    0.00980757    0.0132691        0.00702555    0.00580762
  0.00665376    0.00644864    0.00557685       0.00691017    0.00773067
  0.0051025     0.00753836    0.00878194       0.00608967    0.00599992
  0.0014487     0.00282048    0.00339739       0.00128203    0.000910245
  ⋮                                        ⋱                
  0.0029615     0.00421789    0.00382046   …   0.0030256     0.00248715
  0.0102435     0.00862809    0.00958962       0.00876912    0.00907681
  0.011564      0.00914091    0.00848707       0.00601274    0.00556403
  0.000128203   0.000320509   0.000256407      0.000410251   0.000141024
  0.0086922     0.00671786    0.00707683       0.00537173    0.00452558
  0.00074358    0.000807682   0.00071794   …   0.000756401   0.00035897
  0.0030256     0.00432046    0.00388457       0.00326919    0.00258971
  0.00156408    0.0027692     0.00201279       0.00216664    0.00171793
  0.0209228     0.0520122     0.0511147        0.047102      0.0321919

get_probs!(test_dataset, test_timeseries, microstate_n)
test_dataset

514×250 Matrix{Float64}:
 16.5825       16.8311       17.3268       …  17.5284       17.7215
  5.18102       5.18068       5.15935          5.2425        5.29067
  0.00217946    0.00134614    0.000333329      0.00752554    0.00255125
  0.01791       0.0183716     0.013628         0.0200382     0.0157818
  0.00239741    0.00265381    0.000987167      0.00549993    0.00416661
  0.0129742     0.0127691     0.010564     …   0.0127947     0.0112563
  0.0101281     0.00841015    0.00464097       0.0106665     0.00771785
  0.00739734    0.00546147    0.00678196       0.00792298    0.00747426
  0.00658966    0.0065512     0.00458968       0.00837169    0.00688453
  0.00266663    0.00279484    0.00162818       0.00173075    0.00111537
  ⋮                                        ⋱                
  0.00312817    0.0028974     0.00271791   …   0.00269227    0.00252561
  0.00884604    0.00867938    0.010564         0.00911527    0.00755119
  0.0093845     0.0103588     0.010987         0.00430764    0.00528198
  0.000256407   0.000153844   0.000166665      5.12814e-5    0.000333329
  0.00733324    0.00776913    0.0103588        0.00492301    0.00392303
  0.000769221   0.000858963   0.000615377  …   0.000397431   0.000769221
  0.0028333     0.00255125    0.00278202       0.00279484    0.00234612
  0.00153844    0.00193587    0.00148716       0.00198715    0.00178203
  0.0300894     0.0297304     0.0219356        0.0552813     0.0391662

Defining the neural network model

model = Chain(
    Dense(2^(microstate_n * microstate_n) + 2 => 512, identity),
    Dense(512 => 256, selu),
    Dense(256 => 64, selu),
    Dense(64 => length(ρ_cls)),
    softmax
)

model = f64(model)

Chain(
  Dense(514 => 512),                    # 263_680 parameters
  Dense(512 => 256, selu),              # 131_328 parameters
  Dense(256 => 64, selu),               # 16_448 parameters
  Dense(64 => 5),                       # 325 parameters
  NNlib.softmax,
)                   # Total: 8 arrays, 411_781 parameters, 3.142 MiB.

Training the MLP

First, we encode the labels using one-hot vectors:

train_labels = Flux.onehotbatch(train_labels, ρ_cls)

5×1000 OneHotMatrix(::Vector{UInt32}) with eltype Bool:
 1  1  1  1  1  1  1  1  1  1  1  1  1  …  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅
 ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅     ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅
 ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅     ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅
 ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅     ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅
 ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅     1  1  1  1  1  1  1  1  1  1  1  1

test_labels = Flux.onehotbatch(test_labels, ρ_cls)

5×250 OneHotMatrix(::Vector{UInt32}) with eltype Bool:
 1  1  1  1  1  1  1  1  1  1  1  1  1  …  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅
 ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅     ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅
 ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅     ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅
 ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅     ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅
 ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅     1  1  1  1  1  1  1  1  1  1  1  1

We then define the data loader and optimizer:

loader = Flux.DataLoader((train_dataset, train_labels), batchsize = 32, shuffle = true)

32-element DataLoader(::Tuple{Matrix{Float64}, OneHotArrays.OneHotMatrix{UInt32, Vector{UInt32}}}, shuffle=true, batchsize=32)
  with first element:
  (514×32 Matrix{Float64}, 5×32 OneHotMatrix(::Vector{UInt32}) with eltype Bool,)

opt = Flux.setup(Flux.Adam(0.005), model)

(layers = ((weight = Leaf(Adam(eta=0.005, beta=(0.9, 0.999), epsilon=1.0e-8), ([0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0], [0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0], (0.9, 0.999))), bias = Leaf(Adam(eta=0.005, beta=(0.9, 0.999), epsilon=1.0e-8), ([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], (0.9, 0.999))), σ = ()), (weight = Leaf(Adam(eta=0.005, beta=(0.9, 0.999), epsilon=1.0e-8), ([0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0], [0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0], (0.9, 0.999))), bias = Leaf(Adam(eta=0.005, beta=(0.9, 0.999), epsilon=1.0e-8), ([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], (0.9, 0.999))), σ = ()), (weight = Leaf(Adam(eta=0.005, beta=(0.9, 0.999), epsilon=1.0e-8), ([0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0], [0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0], (0.9, 0.999))), bias = Leaf(Adam(eta=0.005, beta=(0.9, 0.999), epsilon=1.0e-8), ([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], (0.9, 0.999))), σ = ()), (weight = Leaf(Adam(eta=0.005, beta=(0.9, 0.999), epsilon=1.0e-8), ([0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0], [0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0], (0.9, 0.999))), bias = Leaf(Adam(eta=0.005, beta=(0.9, 0.999), epsilon=1.0e-8), ([0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0], (0.9, 0.999))), σ = ()), ()),)

The training loop is:

for epc ∈ 1:50
    for (x, y) ∈ loader
        _, grads = Flux.withgradient(model) do m
            y_hat = m(x)
            Flux.crossentropy(y_hat, y)
        end

        Flux.update!(opt, model, grads[1])
    end
end

Model evaluation

We compute the classification accuracy as follows:

using LinearAlgebra
function get_quatifiers(predict, trusty, classes)
    conf = zeros(Int, length(classes), length(classes))
    sz = size(predict, 2)

    for i in 1:sz
        mx_prd = findmax(predict[:, i])
        mx_trt = findmax(trusty[:, i])

        conf[mx_prd[2], mx_trt[2]] += 1
    end

    return tr(conf) / (sum(conf) + eps())
end

get_quatifiers (generic function with 1 method)

accuracy = get_quatifiers(model(test_dataset), test_labels, ρ_cls)
accuracy * 100

100.0