This document presents a fictitious problem of learning the length of a spiral;
The equation of the features $x_1$ and $x_2$ and target is given by: \begin{eqnarray} x_1 &=& \theta \cos(\theta) + \epsilon_1 ~~~~~~ x_2 = \theta \sin(\theta) + \epsilon_2 \\ y &=& \frac{1}{2}\left[ \theta \sqrt{1+\theta^2}+ \sinh^{-1} \theta \right] \nonumber \label{eq:spiral} \end{eqnarray} where $x = (x_1, x_2)$ is a point in the Cartesian plane defined by the spiral representing the features $x_1$ and $x_2$, $\theta$ is an independent variable, $\epsilon_i$ ($i \in {1, 2}$) is random noise, and the target value is given by $y$, which is the length of the spiral calculated at a point $x$. This toy model presents some interesting features for our analysis, such as the feature domain over the spiral and the substantial variance of the target value when varying one of the features' coordinates while keeping the other one fixed.
This notebook compares the LIME with MeLIME results. You will need to have LIME installed with you would like to obtain LIME explanations, you can do that by:
pip install lime
import sys, os
sys.path.append('..')
import pickle
import numpy as np
from matplotlib import pyplot as plt
from mpl_toolkits.axes_grid1 import make_axes_locatable
from sklearn import metrics
from sklearn.pipeline import make_pipeline
from sklearn.neural_network import MLPRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import HuberRegressor as Linear
# Data - Toy Model Length Spiral
from utils.domain import Spiral
from melime.explainers.explainer import Explainer
from melime.generators.kde_gen import KDEGen
from melime.explainers.visualizations.plot_importance import ExplainGraph
import lime.lime_tabular # Comment this line if you do not want to run lime.
data = Spiral(n_samples=10000, theta_domain=[2*np.pi, 2.5*np.pi], error_x=0.1)
x_train, x_test, y_train, y_test = train_test_split(data.data, data.target, train_size=0.80)
model = make_pipeline(
StandardScaler(),
MLPRegressor(
solver='lbfgs',random_state=1, max_iter=10000, early_stopping=True,
alpha=0.01,
hidden_layer_sizes=[50, 50, 50, 50, 50]
))
filename = 'mlp_model_spiral.bin'
if os.path.exists(filename):
model = pickle.load(open(filename, 'rb'))
else:
model.fit(x_train, y_train)
pickle.dump(model, open(filename, 'wb'))
print('R2: ', metrics.r2_score(y_test, model.predict(x_test)))
print('MSE: ', metrics.mean_squared_error(y_test, model.predict(x_test)))
categorical_features = np.argwhere(np.array([len(set(data.data[:,x])) for x in range(data.data.shape[1])]) <= 10).flatten()
categorical_features
For point $x^*$, the target value (the length of the spiral) will locally depend on the value of $x_1$. Thus the explanation methods should indicate that variable as the most important.
x_explain = np.array([[0.0, 8.0]])
y_p = model.predict(x_train)
fig, ax = plt.subplots()
fig.set_size_inches(8,7)
cp1 = ax.scatter(x_train[:, 0], x_train[:, 1], s=50, c=y_p)
cp2 = ax.scatter(x_explain[:, 0], x_explain[:, 1], s=100, c='red', marker='*')
divider = make_axes_locatable(ax)
cax = divider.append_axes("right", size="5%", pad=0.05)
plt.colorbar(cp1, cax=cax)
cax.set_ylabel('Predicted Length Spiral', fontsize=18)
cax.tick_params(labelsize=14)
fig.set_size_inches(7,7)
ax.set_xlabel('$x_1$', fontsize=20)
ax.set_ylabel('$x_2$', fontsize=20)
ax.tick_params(axis='x', labelsize=16)
ax.tick_params(axis='y', labelsize=16)
print(f'x_explain: {x_explain}')
print(f'Predict Lenght: {model.predict(np.array([[0.0, 8.0]]))[0]:5.4f}')
explainer = lime.lime_tabular.LimeTabularExplainer(
x_train, feature_names=data.feature_names,
class_names=['legth'], categorical_features=categorical_features,
verbose=True, mode='regression', discretize_continuous=False)
exp = explainer.explain_instance(x_explain[0], model.predict)
exp.domain_mapper.feature_names,
exp.domain_mapper.feature_values,
# Dictionary for plot the explanation
dict_imp = {e[0]:e[1] for e in exp.as_list()}
importances = [dict_imp[e] for e in exp.domain_mapper.feature_names]
explanation = dict(
chi_names=exp.domain_mapper.feature_names,
chi_values=np.array(x_explain),
x_names=exp.domain_mapper.feature_names,
x_values=exp.domain_mapper.feature_values,
y_p=model.predict(np.array([[0.0, 8.0]]))[0],
y_p_max=exp.max_value,
y_p_min=exp.min_value,
y_p_local_model=exp.local_pred[0],
y_p_local_model_max=None,
y_p_local_model_min=None,
error=None,
importances=importances,
diff_convergence_importances=None,
ind_class_sorted=0,
class_names= ["lenght"]
)
fig, axs = ExplainGraph.plot(explanation)
plt.savefig(f'spiral_x_{x_explain}_lime.svg', dpi=300)
The local prediction does not agree with the ML model predictions (difference of $\approx 20$), which decreases the trust of the produced explanation. Despite that, the produced explanation is not correct. The feature importance is positive for both features $x_1$ and $x_2$. While a decrease in the $x_1$ should locally increase the spiral length.
# exp.show_in_notebook(show_table=True)
Now, we use Kernel Density Estimation (KDE) to obtain an estimative of the manifold of the feature space. Then, the interpretation is produced from samples drawn from this estimative close from the instance x1
generator = KDEGen(verbose=True).fit(x_train)
x_set = generator.sample_radius(x_explain, r=1.0, n_samples=500)
y_p = model.predict(x_train)
fig, ax = plt.subplots()
cp1 = ax.scatter(x_train[:, 0], x_train[:, 1], s=50, c=y_p)
cp = ax.scatter(x_set[:, 0], x_set[:, 1], s=20, c='tab:gray', alpha=1.0, label='$samples$')
cp2 = ax.scatter(x_explain[:, 0], x_explain[:, 1], s=100, c='red', marker='*', label='$x^*$')
divider = make_axes_locatable(ax)
leg = ax.legend(fontsize=15)
ax.tick_params(direction='in', length=6, right=True, top=True)
cax = divider.append_axes("right", size="5%", pad=0.05)
plt.colorbar(cp1, cax=cax)
cax.set_ylabel('Predicted Length Spiral', fontsize=18)
cax.tick_params(labelsize=14)
fig.set_size_inches(7,4)
ax.set_xlabel('$x_1$', fontsize=20)
ax.set_ylabel('$x_2$', fontsize=20, labelpad=-15)
ax.tick_params(axis='x', labelsize=14)
ax.tick_params(axis='y', labelsize=14)
plt.tight_layout()
fig.set_size_inches(7,7)
plt.savefig('spiral_figure_spiral_legth.svg', dpi=300)
explainer = Explainer(
model_predict=model.predict,
generator=generator,
local_model='HuberRegressor',
feature_names=data.feature_names,
target_names=["lenght"]
)
explanation, counterfactual_examples = explainer.explain_instance(
x_explain=x_explain.reshape(1, -1),
r=1.0,
n_samples=1000,
tol_importance=0.01,
local_mini_batch_max=20,
scale_data=False,
weight_kernel='gaussian'
)
fig, axs = ExplainGraph.plot(explanation.explain())
plt.savefig(f'spiral_x_{x_explain}_m-lime_kde_HR.svg', dpi=300)
fig, axs = ExplainGraph.plot_errors(explanation)
explainer = Explainer(
model_predict=model.predict,
generator=generator,
local_model='Ridge',
feature_names=data.feature_names,
target_names=["lenght"]
)
explanation, counterfactual_examples = explainer.explain_instance(
x_explain=x_explain.reshape(1, -1),
r=1.0,
n_samples=1000,
tol_importance=0.01,
local_mini_batch_max=20,
scale_data=False,
weight_kernel='gaussian'
)
fig, axs = ExplainGraph.plot(explanation.explain())
plt.savefig(f'spiral_x_{x_explain}_m-lime_kde_R.svg', dpi=300)
fig, axs = ExplainGraph.plot_errors(explanation)
Thank you!