plot_model
This function analyzes the performance of a trained model on the hold-out set. It may require re-training the model in certain cases.
Example
Copy # load dataset
from pycaret . datasets import get_data
diabetes = get_data ( 'diabetes' )
# init setup
from pycaret . classification import *
clf1 = setup (data = diabetes, target = 'Class variable' )
# creating a model
lr = create_model ( 'lr' )
# plot model
plot_model (lr, plot = 'auc' )
Change the scale
The resolution scale of the figure can be changed with scale
parameter.
Copy # load dataset
from pycaret . datasets import get_data
diabetes = get_data ( 'diabetes' )
# init setup
from pycaret . classification import *
clf1 = setup (data = diabetes, target = 'Class variable' )
# creating a model
lr = create_model ( 'lr' )
# plot model
plot_model (lr, plot = 'auc' , scale = 3 )
Save the plot
You can save the plot as a png
file using the save
parameter.
Copy # load dataset
from pycaret . datasets import get_data
diabetes = get_data ( 'diabetes' )
# init setup
from pycaret . classification import *
clf1 = setup (data = diabetes, target = 'Class variable' )
# creating a model
lr = create_model ( 'lr' )
# plot model
plot_model (lr, plot = 'auc' , save = True )
Customize the plot
PyCaret uses Yellowbrick for most of the plotting work. Any argument that is acceptable for Yellowbrick visualizers can be passed as plot_kwargs
parameter.
Copy # load dataset
from pycaret . datasets import get_data
diabetes = get_data ( 'diabetes' )
# init setup
from pycaret . classification import *
clf1 = setup (data = diabetes, target = 'Class variable' )
# creating a model
lr = create_model ( 'lr' )
# plot model
plot_model (lr, plot = 'confusion_matrix' , plot_kwargs = { 'percent' : True })
Before Customization After Customization
Use train data
If you want to assess the model plot on the train data, you can pass use_train_data=True
in the plot_model
function.
Copy # load dataset
from pycaret . datasets import get_data
diabetes = get_data ( 'diabetes' )
# init setup
from pycaret . classification import *
clf1 = setup (data = diabetes, target = 'Class variable' )
# creating a model
lr = create_model ( 'lr' )
# plot model
plot_model (lr, plot = 'auc' , use_train_data = True )
Plot on train data vs. hold-out data
Examples by module
Classification
Recursive Feature Selection
Feature Importance (Top 10)
Feature IImportance (all)
auc confusion_matrix threshold pr error class_report rfe learning vc
feature manifold calibration dimension boundary lift gain ks parameter
Regression
Recursive Feature Selection
Feature Importance (top 10)
residuals error cooks rfe feature learning vc manifold
Clustering
cluster tsne elbow silhouette distance distribution
Anomaly Detection
t-SNE (3d) Dimension Plot
evaluate_model
The evaluate_model
displays a user interface for analyzing the performance of a trained model. It calls the plot_model function internally.
Copy # load dataset
from pycaret . datasets import get_data
juice = get_data ( 'juice' )
# init setup
from pycaret . classification import *
exp_name = setup (data = juice, target = 'Purchase' )
# create model
lr = create_model ( 'lr' )
# launch evaluate widget
evaluate_model (lr)
NOTE: This function only works in Jupyter Notebook or an equivalent environment.
interpret_model
This function analyzes the predictions generated from a trained model. Most plots in this function are implemented based on the SHAP (Shapley Additive exPlanations). For more info on this, please see https://shap.readthedocs.io/en/latest/
Example
Copy # load dataset
from pycaret . datasets import get_data
diabetes = get_data ( 'diabetes' )
# init setup
from pycaret . classification import *
clf1 = setup (data = diabetes, target = 'Class variable' )
# creating a model
xgboost = create_model ( 'xgboost' )
# interpret model
interpret_model (xgboost)
Save the plot
You can save the plot as a png
file using the save
parameter.
Copy # load dataset
from pycaret . datasets import get_data
diabetes = get_data ( 'diabetes' )
# init setup
from pycaret . classification import *
clf1 = setup (data = diabetes, target = 'Class variable' )
# creating a model
xgboost = create_model ( 'xgboost' )
# interpret model
interpret_model (xgboost, save = True )
NOTE: When save=True
no plot is displayed in the Notebook.
Change plot type
There are a few different plot types available that can be changed by the plot
parameter.
Correlation
Copy # load dataset
from pycaret . datasets import get_data
diabetes = get_data ( 'diabetes' )
# init setup
from pycaret . classification import *
clf1 = setup (data = diabetes, target = 'Class variable' )
# creating a model
xgboost = create_model ( 'xgboost' )
# interpret model
interpret_model (xgboost, plot = 'correlation' )
By default, PyCaret uses the first feature in the dataset but that can be changed using feature
parameter.
Copy # load dataset
from pycaret . datasets import get_data
diabetes = get_data ( 'diabetes' )
# init setup
from pycaret . classification import *
clf1 = setup (data = diabetes, target = 'Class variable' )
# creating a model
xgboost = create_model ( 'xgboost' )
# interpret model
interpret_model (xgboost, plot = 'correlation' , feature = 'Age (years)' )
Partial Dependence Plot
Copy # load dataset
from pycaret . datasets import get_data
diabetes = get_data ( 'diabetes' )
# init setup
from pycaret . classification import *
clf1 = setup (data = diabetes, target = 'Class variable' )
# creating a model
xgboost = create_model ( 'xgboost' )
# interpret model
interpret_model (xgboost, plot = 'pdp' )
By default, PyCaret uses the first available feature in the dataset but this can be changed using the feature
parameter.
Copy # load dataset
from pycaret . datasets import get_data
diabetes = get_data ( 'diabetes' )
# init setup
from pycaret . classification import *
clf1 = setup (data = diabetes, target = 'Class variable' )
# creating a model
xgboost = create_model ( 'xgboost' )
# interpret model
interpret_model (xgboost, plot = 'pdp' , feature = 'Age (years)' )
Morris Sensitivity Analysis
Copy # load dataset
from pycaret . datasets import get_data
diabetes = get_data ( 'diabetes' )
# init setup
from pycaret . classification import *
clf1 = setup (data = diabetes, target = 'Class variable' )
# creating a model
xgboost = create_model ( 'xgboost' )
# interpret model
interpret_model (xgboost, plot = 'msa' )
Permutation Feature Importance
Copy # load dataset
from pycaret . datasets import get_data
diabetes = get_data ( 'diabetes' )
# init setup
from pycaret . classification import *
clf1 = setup (data = diabetes, target = 'Class variable' )
# creating a model
xgboost = create_model ( 'xgboost' )
# interpret model
interpret_model (xgboost, plot = 'pfi' )
Reason Plot
Copy # load dataset
from pycaret . datasets import get_data
diabetes = get_data ( 'diabetes' )
# init setup
from pycaret . classification import *
clf1 = setup (data = diabetes, target = 'Class variable' )
# creating a model
xgboost = create_model ( 'xgboost' )
# interpret model
interpret_model (xgboost, plot = 'reason' )
When you generate reason
plot without passing the specific index of test data, you will get the interactive plot displayed with the ability to select the x and y-axis. This will only be possible if you are using Jupyter Notebook or an equivalent environment. If you want to see this plot for a specific observation, you will have to pass the index in the observation
parameter.
Copy # load dataset
from pycaret . datasets import get_data
diabetes = get_data ( 'diabetes' )
# init setup
from pycaret . classification import *
clf1 = setup (data = diabetes, target = 'Class variable' )
# creating a model
xgboost = create_model ( 'xgboost' )
# interpret model
interpret_model (xgboost, plot = 'reason' , observation = 1 )
Here the observation = 1
means index 1 from the test set.
Use train data
By default, all the plots are generated on the test dataset. If you want to generate plots using a train data set (not recommended) you can use use_train_data
parameter.
Copy # load dataset
from pycaret . datasets import get_data
diabetes = get_data ( 'diabetes' )
# init setup
from pycaret . classification import *
clf1 = setup (data = diabetes, target = 'Class variable' )
# creating a model
xgboost = create_model ( 'xgboost' )
# interpret model
interpret_model (xgboost, use_train_data = True )
dashboard
The dashboard
function generates the interactive dashboard for a trained model. The dashboard is implemented using ExplainerDashboard (explainerdashboard.readthedocs.io )
Dashboard Example
Copy # load dataset
from pycaret . datasets import get_data
juice = get_data ( 'juice' )
# init setup
from pycaret . classification import *
exp_name = setup (data = juice, target = 'Purchase' )
# train model
lr = create_model ( 'lr' )
# launch dashboard
dashboard (lr)
Video:
check_fairness
There are many approaches to conceptualizing fairness. The check_fairness
function follows the approach known as group fairness, which asks: which groups of individuals are at risk for experiencing harm. check_fairness
provides fairness-related metrics between different groups (also called sub-population).
Check Fairness Example
Copy # load dataset
from pycaret . datasets import get_data
income = get_data ( 'income' )
# init setup
from pycaret . classification import *
exp_name = setup (data = income, target = 'income >50K' )
# train model
lr = create_model ( 'lr' )
# check model fairness
lr_fairness = check_fairness (lr, sensitive_features = [ 'sex' , 'race' ])
Video:
get_leaderboard
This function returns the leaderboard of all models trained in the current setup.
Copy # load dataset
from pycaret . datasets import get_data
diabetes = get_data ( 'diabetes' )
# init setup
from pycaret . classification import *
clf1 = setup (data = diabetes, target = 'Class variable' )
# compare models
top3 = compare_models (n_select = 3 )
# tune top 3 models
tuned_top3 = [ tune_model (i) for i in top3]
# ensemble top 3 tuned models
ensembled_top3 = [ ensemble_model (i) for i in tuned_top3]
# blender
blender = blend_models (tuned_top3)
# stacker
stacker = stack_models (tuned_top3)
# check leaderboard
get_leaderboard ()
You can also access the trained Pipeline with this.
Copy # check leaderboard
lb = get_leaderboard ()
# select top model
lb . iloc [ 0 ] [ 'Model' ]
assign_model
This function assigns labels to the training dataset using the trained model. It is available for Clustering , Anomaly Detection , and NLP modules.
Clustering
Copy # load dataset
from pycaret . datasets import get_data
jewellery = get_data ( 'jewellery' )
# init setup
from pycaret . clustering import *
clu1 = setup (data = jewellery)
# train a model
kmeans = create_model ( 'kmeans' )
# assign model
assign_model (kmeans)
Anomaly Detection
Copy # load dataset
from pycaret . datasets import get_data
anomaly = get_data ( 'anomaly' )
# init setup
from pycaret . anomaly import *
ano1 = setup (data = anomaly)
# train a model
iforest = create_model ( 'iforest' )
# assign model
assign_model (iforest)