This example shows how to use CatBoost to classify data with a categorical feature 'color'. It trains the model and prints accuracy and predictions.
from catboost import CatBoostClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import pandas as pd
# Sample data with categorical and numeric features
data = pd.DataFrame({
'color': ['red', 'green', 'blue', 'green', 'red', 'blue', 'green', 'red'],
'size': [1, 2, 3, 2, 1, 3, 2, 1],
'weight': [10, 20, 30, 20, 10, 30, 20, 10],
'label': [0, 1, 0, 1, 0, 0, 1, 0]
})
# Features and target
X = data[['color', 'size', 'weight']]
y = data['label']
# Convert categorical feature to category dtype
X['color'] = X['color'].astype('category')
# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)
# Indices of categorical features
cat_features = [0]
# Create and train model
model = CatBoostClassifier(iterations=50, learning_rate=0.1, depth=4, verbose=0)
model.fit(X_train, y_train, cat_features=cat_features)
# Predict
preds = model.predict(X_test)
# Accuracy
acc = accuracy_score(y_test, preds)
print(f"Accuracy: {acc:.2f}")
print(f"Predictions: {preds.tolist()}")