Introduction
CatBoost is a tool that helps computers learn from data to make good guesses, especially when the data has categories like colors or types.
Jump into concepts and practice - no test required
from catboost import CatBoostClassifier model = CatBoostClassifier(iterations=100, learning_rate=0.1, depth=6) model.fit(X_train, y_train, cat_features=cat_features) predictions = model.predict(X_test)
from catboost import CatBoostClassifier model = CatBoostClassifier(iterations=50) model.fit(X_train, y_train) predictions = model.predict(X_test)
model = CatBoostClassifier(iterations=200, learning_rate=0.05, depth=8) model.fit(X_train, y_train, cat_features=[0, 2])
model = CatBoostClassifier()
model.fit(X_train, y_train, cat_features=cat_features, verbose=0)from catboost import CatBoostClassifier from sklearn.model_selection import train_test_split from sklearn.metrics import accuracy_score import pandas as pd # Sample data with categorical and numeric features data = pd.DataFrame({ 'color': ['red', 'green', 'blue', 'green', 'red', 'blue', 'green', 'red'], 'size': [1, 2, 3, 2, 1, 3, 2, 1], 'weight': [10, 20, 30, 20, 10, 30, 20, 10], 'label': [0, 1, 0, 1, 0, 0, 1, 0] }) # Features and target X = data[['color', 'size', 'weight']] y = data['label'] # Convert categorical feature to category dtype X['color'] = X['color'].astype('category') # Split data X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42) # Indices of categorical features cat_features = [0] # Create and train model model = CatBoostClassifier(iterations=50, learning_rate=0.1, depth=4, verbose=0) model.fit(X_train, y_train, cat_features=cat_features) # Predict preds = model.predict(X_test) # Accuracy acc = accuracy_score(y_test, preds) print(f"Accuracy: {acc:.2f}") print(f"Predictions: {preds.tolist()}")
CatBoost in machine learning?from catboost import CatBoostClassifier X = [[1, 'red'], [2, 'blue'], [3, 'green']] y = [0, 1, 0] model = CatBoostClassifier(iterations=10, verbose=False) model.fit(X, y, cat_features=[1]) preds = model.predict([[2, 'red']]) print(preds.tolist())
from catboost import CatBoostClassifier X = [[1, 'red'], [2, 'blue'], [3, 'green']] y = [0, 1, 0] model = CatBoostClassifier(iterations=10) model.fit(X, y)
cat_features so it can handle them internally.cat_features prevents CatBoost from using its special handling.cat_features and use default parameters -> Option D