Introduction
Saving pipelines lets you keep your trained machine learning steps so you can use them later without retraining.
Jump into concepts and practice - no test required
import joblib # Save pipeline joblib.dump(pipeline, 'pipeline_filename.joblib') # Load pipeline pipeline = joblib.load('pipeline_filename.joblib')
import joblib joblib.dump(my_pipeline, 'model.joblib')
import joblib loaded_pipeline = joblib.load('model.joblib')
import pickle with open('pipeline.pkl', 'wb') as f: pickle.dump(my_pipeline, f)
import pickle with open('pipeline.pkl', 'rb') as f: loaded_pipeline = pickle.load(f)
from sklearn.pipeline import Pipeline from sklearn.preprocessing import StandardScaler from sklearn.linear_model import LogisticRegression from sklearn.datasets import load_iris from sklearn.model_selection import train_test_split import joblib # Load data iris = load_iris() X, y = iris.data, iris.target # Split data X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42) # Create pipeline pipeline = Pipeline([ ('scaler', StandardScaler()), ('clf', LogisticRegression(max_iter=200)) ]) # Train pipeline pipeline.fit(X_train, y_train) # Save pipeline joblib.dump(pipeline, 'iris_pipeline.joblib') # Load pipeline loaded_pipeline = joblib.load('iris_pipeline.joblib') # Predict with loaded pipeline predictions = loaded_pipeline.predict(X_test) # Calculate accuracy accuracy = loaded_pipeline.score(X_test, y_test) print(f'Predictions: {predictions}') print(f'Accuracy: {accuracy:.2f}')
joblib or pickle?pipe to a file called model.pkl using joblib?dump(), not save, write, or store.joblib.dump(pipe, 'model.pkl') to save the pipeline to a file.import joblib
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
pipe = Pipeline([
('scaler', StandardScaler()),
('clf', LogisticRegression())
])
pipe.fit([[0, 0], [1, 1]], [0, 1])
joblib.dump(pipe, 'pipe.pkl')
loaded_pipe = joblib.load('pipe.pkl')
pred = loaded_pipe.predict([[2, 2]])
print(pred)loaded_pipe = joblib.load('pipeline.pkl') but got a FileNotFoundError. What is the most likely cause?pipeline.pkl is not found in the current directory.[[5, 5]]?joblib.dump() correctly to save the pipeline, and joblib.load() to load it.predict on new data correctly and prints the result.pickle.load to save; import joblib
joblib.save(pipeline, 'model.pkl')
loaded = joblib.load('model.pkl')
pred = loaded.predict([[5, 5]])
print(pred) uses non-existent joblib.save; import pickle
pickle.dump(pipeline, 'model.pkl')
loaded = pickle.load('model.pkl')
pred = loaded.predict([[5, 5]])
print(pred) incorrectly uses pickle.dump and pickle.load (both require file objects from open() with 'wb'/'rb' modes).