import numpy as np
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import Ridge
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import r2_score
# Generate synthetic data simulating house prices
X, y = make_regression(n_samples=200, n_features=1, noise=15, random_state=42)
# Add non-linear relationship
y = y + 0.5 * (X[:, 0] ** 2)
# Split data
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.3, random_state=42)
# Linear regression baseline
from sklearn.linear_model import LinearRegression
lin_reg = LinearRegression()
lin_reg.fit(X_train, y_train)
train_pred_lin = lin_reg.predict(X_train)
val_pred_lin = lin_reg.predict(X_val)
train_r2_lin = r2_score(y_train, train_pred_lin)
val_r2_lin = r2_score(y_val, val_pred_lin)
# Polynomial regression with degree 2 + Ridge regularization
poly = PolynomialFeatures(degree=2)
X_train_poly = poly.fit_transform(X_train)
X_val_poly = poly.transform(X_val)
ridge_reg = Ridge(alpha=1.0)
ridge_reg.fit(X_train_poly, y_train)
train_pred_poly = ridge_reg.predict(X_train_poly)
val_pred_poly = ridge_reg.predict(X_val_poly)
train_r2_poly = r2_score(y_train, train_pred_poly)
val_r2_poly = r2_score(y_val, val_pred_poly)
# Decision Tree Regression
tree_reg = DecisionTreeRegressor(max_depth=4, random_state=42)
tree_reg.fit(X_train, y_train)
train_pred_tree = tree_reg.predict(X_train)
val_pred_tree = tree_reg.predict(X_val)
train_r2_tree = r2_score(y_train, train_pred_tree)
val_r2_tree = r2_score(y_val, val_pred_tree)
# Print results
results = {
'Linear Regression': {'train_r2': train_r2_lin, 'val_r2': val_r2_lin},
'Polynomial Ridge Regression': {'train_r2': train_r2_poly, 'val_r2': val_r2_poly},
'Decision Tree Regression': {'train_r2': train_r2_tree, 'val_r2': val_r2_tree}
}
print(results)