This code trains two decision tree models: one very simple and one very complex. It shows how the simple model has high bias (makes errors because it's too simple) and the complex model has high variance (makes errors because it overfits).
from sklearn.datasets import make_regression
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
# Create sample data
X, y = make_regression(n_samples=100, n_features=1, noise=10, random_state=42)
# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
# Train simple model (high bias)
simple_model = DecisionTreeRegressor(max_depth=1, random_state=42)
simple_model.fit(X_train, y_train)
y_pred_simple = simple_model.predict(X_test)
simple_mse = mean_squared_error(y_test, y_pred_simple)
# Train complex model (high variance)
complex_model = DecisionTreeRegressor(max_depth=20, random_state=42)
complex_model.fit(X_train, y_train)
y_pred_complex = complex_model.predict(X_test)
complex_mse = mean_squared_error(y_test, y_pred_complex)
print(f"Simple model MSE (high bias): {simple_mse:.2f}")
print(f"Complex model MSE (high variance): {complex_mse:.2f}")