Introduction
A pipeline helps you put multiple steps of data processing and model training together in one easy flow. It makes your work cleaner and less error-prone.
Jump into concepts and practice - no test required
from sklearn.pipeline import Pipeline pipeline = Pipeline([ ('step_name1', transformer_or_estimator1), ('step_name2', transformer_or_estimator2), # ... ])
from sklearn.pipeline import Pipeline from sklearn.preprocessing import StandardScaler from sklearn.linear_model import LogisticRegression pipeline = Pipeline([ ('scale', StandardScaler()), ('model', LogisticRegression()) ])
from sklearn.pipeline import Pipeline from sklearn.impute import SimpleImputer from sklearn.tree import DecisionTreeClassifier pipeline = Pipeline([ ('impute', SimpleImputer(strategy='mean')), ('model', DecisionTreeClassifier()) ])
from sklearn.pipeline import Pipeline from sklearn.preprocessing import StandardScaler from sklearn.linear_model import LogisticRegression from sklearn.datasets import load_iris from sklearn.model_selection import train_test_split from sklearn.metrics import accuracy_score # Load data iris = load_iris() X, y = iris.data, iris.target # Split data X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42) # Create pipeline pipeline = Pipeline([ ('scaler', StandardScaler()), ('logreg', LogisticRegression(max_iter=200)) ]) # Train model pipeline.fit(X_train, y_train) # Predict y_pred = pipeline.predict(X_test) # Evaluate accuracy = accuracy_score(y_test, y_pred) print(f"Test accuracy: {accuracy:.2f}")
Pipeline in scikit-learn?print(y_pred) output?from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
import numpy as np
X_train = np.array([[1, 2], [2, 3], [3, 4]])
y_train = np.array([0, 1, 0])
X_test = np.array([[1, 2], [4, 5]])
pipe = Pipeline([
('scaler', StandardScaler()),
('model', LogisticRegression())
])
pipe.fit(X_train, y_train)
y_pred = pipe.predict(X_test)
print(y_pred)from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
pipe = Pipeline([
('scaler', StandardScaler),
('model', LogisticRegression())
])
pipe.fit(X_train, y_train)