Spiral Problem¶

This document presents a fictitious problem of learning the length of a spiral;

The equation of the features $x_1$ and $x_2$ and target is given by: \begin{eqnarray} x_1 &=& \theta \cos(\theta) + \epsilon_1 ~~~~~~ x_2 = \theta \sin(\theta) + \epsilon_2 \\ y &=& \frac{1}{2}\left[ \theta \sqrt{1+\theta^2}+ \sinh^{-1} \theta \right] \nonumber \label{eq:spiral} \end{eqnarray} where $x = (x_1, x_2)$ is a point in the Cartesian plane defined by the spiral representing the features $x_1$ and $x_2$, $\theta$ is an independent variable, $\epsilon_i$ ($i \in {1, 2}$) is random noise, and the target value is given by $y$, which is the length of the spiral calculated at a point $x$. This toy model presents some interesting features for our analysis, such as the feature domain over the spiral and the substantial variance of the target value when varying one of the features' coordinates while keeping the other one fixed.

This notebook compares the LIME with MeLIME results. You will need to have LIME installed with you would like to obtain LIME explanations, you can do that by:

pip install lime

import sys, os
sys.path.append('..')

import pickle
import numpy as np
from matplotlib import pyplot as plt
from mpl_toolkits.axes_grid1 import make_axes_locatable
from sklearn import metrics
from sklearn.pipeline import make_pipeline
from sklearn.neural_network import MLPRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import HuberRegressor as Linear

# Data - Toy Model Length Spiral
from utils.domain import Spiral

from melime.explainers.explainer import Explainer
from melime.generators.kde_gen import KDEGen
from melime.explainers.visualizations.plot_importance import ExplainGraph

import lime.lime_tabular # Comment this line if you do not want to run lime.

Data spiral¶

data = Spiral(n_samples=10000, theta_domain=[2*np.pi, 2.5*np.pi], error_x=0.1)

x_train, x_test, y_train, y_test = train_test_split(data.data, data.target, train_size=0.80)

Training a MLP Model¶

model = make_pipeline(
    StandardScaler(),
    MLPRegressor(
        solver='lbfgs',random_state=1, max_iter=10000, early_stopping=True,
        alpha=0.01,
        hidden_layer_sizes=[50, 50, 50, 50, 50]
))

filename = 'mlp_model_spiral.bin'
if os.path.exists(filename):
    model = pickle.load(open(filename, 'rb'))
else:
    model.fit(x_train, y_train)
    pickle.dump(model, open(filename, 'wb'))

print('R2: ', metrics.r2_score(y_test, model.predict(x_test)))
print('MSE: ', metrics.mean_squared_error(y_test, model.predict(x_test)))

R2:  0.9999806545211491
MSE:  0.010402563142367321

Explanation¶

categorical_features = np.argwhere(np.array([len(set(data.data[:,x])) for x in range(data.data.shape[1])]) <= 10).flatten()
categorical_features

array([], dtype=int64)

Explanation for instance x¹ = (0.0, 8.5):¶

For point $x^*$, the target value (the length of the spiral) will locally depend on the value of $x_1$. Thus the explanation methods should indicate that variable as the most important.

x_explain = np.array([[0.0, 8.0]])

y_p = model.predict(x_train)
fig, ax = plt.subplots()
fig.set_size_inches(8,7)
cp1 = ax.scatter(x_train[:, 0], x_train[:, 1], s=50, c=y_p)
cp2 = ax.scatter(x_explain[:, 0], x_explain[:, 1], s=100, c='red', marker='*')
divider = make_axes_locatable(ax)
cax = divider.append_axes("right", size="5%", pad=0.05)
plt.colorbar(cp1, cax=cax)
cax.set_ylabel('Predicted Length Spiral', fontsize=18)
cax.tick_params(labelsize=14)
fig.set_size_inches(7,7)
ax.set_xlabel('$x_1$', fontsize=20)
ax.set_ylabel('$x_2$', fontsize=20)
ax.tick_params(axis='x', labelsize=16)
ax.tick_params(axis='y', labelsize=16)
print(f'x_explain: {x_explain}')
print(f'Predict Lenght: {model.predict(np.array([[0.0, 8.0]]))[0]:5.4f}')

Lime Explanation¶

explainer = lime.lime_tabular.LimeTabularExplainer(
    x_train, feature_names=data.feature_names,
    class_names=['legth'], categorical_features=categorical_features,
    verbose=True, mode='regression', discretize_continuous=False)

exp = explainer.explain_instance(x_explain[0], model.predict)

Intercept 43.28116702604447
Prediction_local [53.7279711]
Right: 32.55003354460349

exp.domain_mapper.feature_names,

(['x1', 'x2'],)

exp.domain_mapper.feature_values,

(['0.00', '8.00'],)

# Dictionary for plot the explanation
dict_imp = {e[0]:e[1] for e in exp.as_list()}
importances = [dict_imp[e] for e in exp.domain_mapper.feature_names]
explanation = dict(
    chi_names=exp.domain_mapper.feature_names,
    chi_values=np.array(x_explain),
    x_names=exp.domain_mapper.feature_names,
    x_values=exp.domain_mapper.feature_values,
    y_p=model.predict(np.array([[0.0, 8.0]]))[0],
    y_p_max=exp.max_value,
    y_p_min=exp.min_value,
    y_p_local_model=exp.local_pred[0],
    y_p_local_model_max=None,
    y_p_local_model_min=None,
    error=None,
    importances=importances,
    diff_convergence_importances=None,
    ind_class_sorted=0,
    class_names= ["lenght"]
)

fig, axs = ExplainGraph.plot(explanation)
plt.savefig(f'spiral_x_{x_explain}_lime.svg', dpi=300)

The local prediction does not agree with the ML model predictions (difference of $\approx 20$), which decreases the trust of the produced explanation. Despite that, the produced explanation is not correct. The feature importance is positive for both features $x_1$ and $x_2$. While a decrease in the $x_1$ should locally increase the spiral length.

# exp.show_in_notebook(show_table=True)

MeLIME using KDEGen¶

Now, we use Kernel Density Estimation (KDE) to obtain an estimative of the manifold of the feature space. Then, the interpretation is produced from samples drawn from this estimative close from the instance x¹

generator = KDEGen(verbose=True).fit(x_train)

Best Parameter for the KDE:
{'bandwidth': 0.08315789473684211}
Score: -5712.604909985743

x_set = generator.sample_radius(x_explain, r=1.0, n_samples=500)

Visualization of the sampled data:¶

y_p = model.predict(x_train)
fig, ax = plt.subplots()

cp1 = ax.scatter(x_train[:, 0], x_train[:, 1], s=50, c=y_p)
cp = ax.scatter(x_set[:, 0], x_set[:, 1], s=20, c='tab:gray', alpha=1.0, label='$samples$')
cp2 = ax.scatter(x_explain[:, 0], x_explain[:, 1], s=100, c='red', marker='*', label='$x^*$')
divider = make_axes_locatable(ax)
leg = ax.legend(fontsize=15)
ax.tick_params(direction='in', length=6, right=True, top=True)
cax = divider.append_axes("right", size="5%", pad=0.05)
plt.colorbar(cp1, cax=cax)
cax.set_ylabel('Predicted Length Spiral', fontsize=18)
cax.tick_params(labelsize=14)
fig.set_size_inches(7,4)
ax.set_xlabel('$x_1$', fontsize=20)
ax.set_ylabel('$x_2$', fontsize=20, labelpad=-15)
ax.tick_params(axis='x', labelsize=14)
ax.tick_params(axis='y', labelsize=14)
plt.tight_layout()
fig.set_size_inches(7,7)
plt.savefig('spiral_figure_spiral_legth.svg', dpi=300)

Using a HuberRegressor as local model¶

explainer = Explainer(
    model_predict=model.predict,
    generator=generator, 
    local_model='HuberRegressor', 
    feature_names=data.feature_names,
    target_names=["lenght"]
)
explanation, counterfactual_examples = explainer.explain_instance(
    x_explain=x_explain.reshape(1, -1),
    r=1.0,
    n_samples=1000,
    tol_importance=0.01,
    local_mini_batch_max=20,
    scale_data=False,
    weight_kernel='gaussian'
)

fig, axs = ExplainGraph.plot(explanation.explain())
plt.savefig(f'spiral_x_{x_explain}_m-lime_kde_HR.svg', dpi=300)
fig, axs = ExplainGraph.plot_errors(explanation)

Using a Ridge Regression as local model¶

explainer = Explainer(
    model_predict=model.predict,
    generator=generator, 
    local_model='Ridge', 
    feature_names=data.feature_names,
    target_names=["lenght"]
)
explanation, counterfactual_examples = explainer.explain_instance(
    x_explain=x_explain.reshape(1, -1),
    r=1.0,
    n_samples=1000,
    tol_importance=0.01,
    local_mini_batch_max=20,
    scale_data=False,
    weight_kernel='gaussian'
)

fig, axs = ExplainGraph.plot(explanation.explain())
plt.savefig(f'spiral_x_{x_explain}_m-lime_kde_R.svg', dpi=300)
fig, axs = ExplainGraph.plot_errors(explanation)

Thank you!