This example shows removing names before training a simple model to protect privacy.
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
# Sample data with personal info
raw_data = pd.DataFrame({
'age': [25, 32, 47, 51],
'income': [50000, 60000, 80000, 90000],
'name': ['Alice', 'Bob', 'Carol', 'Dave'],
'purchased': [0, 1, 0, 1]
})
# Privacy step: remove personal identifiers
clean_data = raw_data.drop(columns=['name'])
# Prepare data
X = clean_data.drop(columns=['purchased'])
y = clean_data['purchased']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=42)
# Train model
model = LogisticRegression()
model.fit(X_train, y_train)
# Predict and check accuracy
predictions = model.predict(X_test)
accuracy = accuracy_score(y_test, predictions)
print(f"Predictions: {predictions}")
print(f"Accuracy: {accuracy:.2f}")