Tracking Machine Learning experiments

SQLiteTracker provides a simple yet powerful way to track ML experiments using a SQLite database.

from sklearn_evaluation import SQLiteTracker

from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression, Lasso
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
iris = load_iris(as_frame=True)
X, y = iris['data'], iris['target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

models = [RandomForestRegressor(), LinearRegression(), Lasso()]
for m in models:
    model = type(m).__name__
    print(f'Fitting {model}')

    # .new() returns a uuid and creates an entry in the db
    uuid =, y_train)
    y_pred = m.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)

    # add data with the .update(uuid, {'param': 'value'}) method
    tracker.update(uuid, {'mse': mse, 'model': model, **m.get_params()})
Fitting RandomForestRegressor
Fitting LinearRegression
Fitting Lasso

Or use .insert(uuid, params) to supply your own ID:

svr = SVR(), y_train)
y_pred = svr.predict(X_test)
mse = mean_squared_error(y_test, y_pred)

tracker.insert('my_uuid', {'mse': mse, 'model': type(svr).__name__, **svr.get_params()})

tracker shows last experiments by default:



uuid created parameters comment
560751f2aef34a87aee50cec0ca7ac1c2022-04-16 14:41:57{"mse": 0.042600341137617896, "model": "LinearRegression", "copy_X": true, "fit_intercept": true, "n_jobs": null, "normalize": "deprecated", "positive": false}
2e5ecf364aa94c4da36acc8b123294312022-04-16 14:41:57{"mse": 0.4317655183287654, "model": "Lasso", "alpha": 1.0, "copy_X": true, "fit_intercept": true, "max_iter": 1000, "normalize": "deprecated", "positive": false, "precompute": false, "random_state": null, "selection": "cyclic", "tol": 0.0001, "warm_start": false}
my_uuid 2022-04-16 14:41:57{"mse": 0.03041912541362143, "model": "SVR", "C": 1.0, "cache_size": 200, "coef0": 0.0, "degree": 3, "epsilon": 0.1, "gamma": "scale", "kernel": "rbf", "max_iter": -1, "shrinking": true, "tol": 0.001, "verbose": false}
1ad8f65486dd4ba3b899e00c97a4a72d2022-04-16 14:41:56{"mse": 0.009877999999999998, "model": "RandomForestRegressor", "bootstrap": true, "ccp_alpha": 0.0, "criterion": "squared_error", "max_depth": null, "max_features": "auto", "max_leaf_nodes": null, "max_samples": null, "min_impurity_decrease": 0.0, "min_samples_leaf": 1, "min_samples_split": 2, "min_weight_fraction_leaf": 0.0, "n_estimators": 100, "n_jobs": null, "oob_score": false, "random_state": null, "verbose": 0, "warm_start": false}

(Most recent experiments)

Querying experiments

ordered = tracker.query("""
SELECT uuid,
       json_extract(parameters, '$.model') AS model,
       json_extract(parameters, '$.mse') AS mse
FROM experiments
ORDER BY json_extract(parameters, '$.mse') ASC
model mse
1ad8f65486dd4ba3b899e00c97a4a72d RandomForestRegressor 0.009878
my_uuid SVR 0.030419
560751f2aef34a87aee50cec0ca7ac1c LinearRegression 0.042600
2e5ecf364aa94c4da36acc8b12329431 Lasso 0.431766

The query method returns a data frame with “uuid” as the index:


Adding comments

tracker.comment(ordered.index[0], 'Best performing experiment')

User tracker[uuid] to get a single experiment:

created parameters comment
1ad8f65486dd4ba3b899e00c97a4a72d 2022-04-16 14:41:56 {"mse": 0.009877999999999998, "model": "Random... Best performing experiment

Getting recent experiments

The recent method also returns a data frame:

df = tracker.recent()
created parameters comment
560751f2aef34a87aee50cec0ca7ac1c 2022-04-16 14:41:57 {"mse": 0.042600341137617896, "model": "Linear... None
2e5ecf364aa94c4da36acc8b12329431 2022-04-16 14:41:57 {"mse": 0.4317655183287654, "model": "Lasso", ... None
my_uuid 2022-04-16 14:41:57 {"mse": 0.03041912541362143, "model": "SVR", "... None
1ad8f65486dd4ba3b899e00c97a4a72d 2022-04-16 14:41:56 {"mse": 0.009877999999999998, "model": "Random... Best performing experiment

Pass normalize=True to convert the nested JSON dictionary into columns:

df = tracker.recent(normalize=True)
created mse model copy_X fit_intercept n_jobs normalize positive alpha max_iter ... max_features max_leaf_nodes max_samples min_impurity_decrease min_samples_leaf min_samples_split min_weight_fraction_leaf n_estimators oob_score comment
560751f2aef34a87aee50cec0ca7ac1c 2022-04-16 14:41:57 0.042600 LinearRegression True True NaN deprecated False NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN None
2e5ecf364aa94c4da36acc8b12329431 2022-04-16 14:41:57 0.431766 Lasso True True NaN deprecated False 1.0 1000.0 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN None
my_uuid 2022-04-16 14:41:57 0.030419 SVR NaN NaN NaN NaN NaN NaN -1.0 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN None
1ad8f65486dd4ba3b899e00c97a4a72d 2022-04-16 14:41:56 0.009878 RandomForestRegressor NaN NaN NaN NaN NaN NaN NaN ... auto NaN NaN 0.0 1.0 2.0 0.0 100.0 False Best performing experiment

4 rows × 38 columns

# delete our example database
from pathlib import Path
[ ]: