Introduction
Custom transformers let you create your own data processing steps to prepare data exactly how you want before training a model.
Jump into concepts and practice - no test required
from sklearn.base import BaseEstimator, TransformerMixin class MyTransformer(BaseEstimator, TransformerMixin): def __init__(self, param=1): # initialize parameters self.param = param def fit(self, X, y=None): # learn from data if needed return self def transform(self, X): # change data and return it return X
from sklearn.base import BaseEstimator, TransformerMixin class AddConstantTransformer(BaseEstimator, TransformerMixin): def __init__(self, constant=1): self.constant = constant def fit(self, X, y=None): return self def transform(self, X): return X + self.constant
from sklearn.base import BaseEstimator, TransformerMixin import numpy as np class LogTransformer(BaseEstimator, TransformerMixin): def fit(self, X, y=None): return self def transform(self, X): return np.log1p(X)
from sklearn.base import BaseEstimator, TransformerMixin from sklearn.pipeline import Pipeline from sklearn.linear_model import LinearRegression from sklearn.model_selection import train_test_split import numpy as np # Custom transformer that squares the input features class SquareTransformer(BaseEstimator, TransformerMixin): def fit(self, X, y=None): return self def transform(self, X): return X ** 2 # Create sample data X = np.array([[1], [2], [3], [4], [5]]) y = np.array([2, 4, 6, 8, 10]) # Split data X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42) # Build pipeline with custom transformer and linear regression pipeline = Pipeline([ ('square', SquareTransformer()), ('model', LinearRegression()) ]) # Train model pipeline.fit(X_train, y_train) # Predict on test data predictions = pipeline.predict(X_test) # Print predictions and score print('Predictions:', predictions) print('Model R^2 score:', pipeline.score(X_test, y_test))
custom transformer in machine learning pipelines?print(transformed_data) output?
from sklearn.base import BaseEstimator, TransformerMixin
import numpy as np
class AddConstant(BaseEstimator, TransformerMixin):
def __init__(self, constant=1):
self.constant = constant
def fit(self, X, y=None):
return self
def transform(self, X):
return X + self.constant
X = np.array([[1, 2], [3, 4]])
transformer = AddConstant(constant=5)
transformed_data = transformer.fit_transform(X)
print(transformed_data)from sklearn.base import BaseEstimator, TransformerMixin
class MultiplyTransformer(BaseEstimator, TransformerMixin):
def __init__(self, factor=2):
self.factor = factor
def fit(self, X, y=None):
return self
def transform(self, X):
return X * self.factor
transformer = MultiplyTransformer(factor=3)
result = transformer.transform([1, 2, 3])
print(result)