Skip to content

Tour of sensitivity_torch

A simple, exhaustive, example of model tuning using sensitivity analysis

import torch, numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import make_moons

from sensitivity_torch.sensitivity import (
    implicit_jacobian,
    generate_optimization_fns,
)
from sensitivity_torch.extras.optimization import (
    minimize_agd,
    minimize_lbfgs,
    minimize_sqp,
)


def feature_map(X, P, q):
    Phi = torch.cos(X @ P + q)
    return Phi


def optimize_fn(P, q, X, Y):
    """Non-differentiable optimization function using sklearn."""
    Phi = feature_map(X, P, q)
    model = LogisticRegression(max_iter=300, C=1e-2)
    model.fit(Phi, Y)
    z = np.concatenate([model.coef_.reshape(-1), model.intercept_])
    return torch.tensor(z)


def loss_fn(z, P, q, X, Y):
    """The measure of model performance."""
    Phi = feature_map(X, P, q)
    W, b = z[:-1], z[-1]
    prob = torch.sigmoid(Phi @ W + b)
    loss = torch.sum(-(Y * torch.log(prob) + (1 - Y) * torch.log(1 - prob)))
    regularization = 0.5 * torch.sum(W ** 2) / 1e-2
    return (loss + regularization) / X.shape[-2]


def k_fn(z, P, q, X, Y):
    """Optimalty conditions of the model – the fixed-point of optimization."""
    return torch.autograd.functional.jacobian(
        lambda z: loss_fn(z, P, q, X, Y), z, create_graph=True
    )


if __name__ == "__main__":
    ######################## SETUP ####################################
    # generate data and split into train and test sets
    X, Y = make_moons(n_samples=1000, noise=1e-1)
    X, Y = torch.tensor(X), torch.tensor(Y)
    n_train = 200
    Xtr, Ytr = X[:n_train, :], Y[:n_train]
    Xts, Yts = X[n_train:, :], Y[n_train:]

    # generate the Fourier feature map parameters Phi = cos(X @ P + q)
    n_features = 200
    P = torch.tensor(np.random.randn(2, n_features))
    q = torch.tensor(np.zeros(n_features))

    # check that the fixed-point is numerically close to zero
    z = optimize_fn(P, q, X, Y)
    k = k_fn(z, P, q, X, Y)
    assert torch.max(torch.abs(k)) < 1e-5

    ######################## SENSITIVITY ##############################
    # generate sensitivity Jacobians and check their shape
    JP, Jq = implicit_jacobian(lambda z, P, q: k_fn(z, P, q, X, Y), z, P, q)
    assert JP.shape == z.shape + P.shape
    assert Jq.shape == z.shape + q.shape

    ######################## OPTIMIZATION #############################
    # generate necessary functions for optimization
    opt_fn_ = lambda P: optimize_fn(P, q, Xtr, Ytr)  # model optimization
    k_fn_ = lambda z, P: k_fn(z, P, q, Xtr, Ytr)  # fixed-point
    loss_fn_ = lambda z, P: loss_fn(z, P, q, Xts, Yts)  # loss to improve
    f_fn, g_fn, h_fn = generate_optimization_fns(loss_fn_, opt_fn_, k_fn_)

    # choose any optimization routine
    # Ps = minimize_agd(f_fn, g_fn, P, verbose=True, ai=1e-1, af=1e-1, max_it=100)
    # Ps = minimize_lbfgs(f_fn, g_fn, P, verbose=True, lr=1e-1, max_it=10)
    Ps = minimize_sqp(f_fn, g_fn, h_fn, P, verbose=True, max_it=10)

    def predict(z, P, q, X):
        Phi = feature_map(X, P, q)
        W, b = z[:-1], z[-1]
        prob = torch.sigmoid(Phi @ W + b)
        return torch.round(prob)

    # evaluate the results
    acc0 = torch.mean(1.0 * (predict(opt_fn_(P), P, q, Xts) == Yts))
    accf = torch.mean(1.0 * (predict(opt_fn_(Ps), Ps, q, Xts) == Yts))
    print("Accuracy before: %4.2f%%" % (1e2 * acc0))
    print("Accuracy after:  %4.2f%%" % (1e2 * accf))
    print("Loss before:     %9.4e" % loss_fn(opt_fn_(P), P, q, Xts, Yts))
    print("Loss after:      %9.4e" % loss_fn(opt_fn_(Ps), Ps, q, Xts, Yts))