import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
# Sample dataset
data = pd.DataFrame({
'Color': ['Red', 'Green', 'Blue', 'Green', 'Red', 'Blue', 'Red', 'Green'],
'Size': [1, 2, 3, 2, 1, 3, 1, 2],
'Label': [0, 1, 0, 1, 0, 0, 0, 1]
})
# One-hot encode the 'Color' feature
color_encoded = pd.get_dummies(data['Color'], prefix='Color')
# Replace 'Color' column with encoded columns
data_encoded = pd.concat([data.drop('Color', axis=1), color_encoded], axis=1)
# Split features and target
X = data_encoded.drop('Label', axis=1)
y = data_encoded['Label']
# Split data
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.25, random_state=42)
# Train model
model = LogisticRegression()
model.fit(X_train, y_train)
# Predict and evaluate
y_pred = model.predict(X_val)
accuracy = accuracy_score(y_val, y_pred)
print(f"Validation Accuracy after one-hot encoding: {accuracy * 100:.2f}%")