Introduction
Multi-label classification helps us find all the correct answers when one thing can belong to many groups at the same time.
Jump into concepts and practice - no test required
model = SomeMultiLabelModel() model.fit(X_train, Y_train) predictions = model.predict(X_test)
from sklearn.multioutput import MultiOutputClassifier from sklearn.linear_model import LogisticRegression model = MultiOutputClassifier(LogisticRegression()) model.fit(X_train, Y_train) predictions = model.predict(X_test)
import tensorflow as tf model = tf.keras.Sequential([ tf.keras.layers.Dense(64, activation='relu'), tf.keras.layers.Dense(num_labels, activation='sigmoid') ]) model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) model.fit(X_train, Y_train, epochs=5) predictions = model.predict(X_test)
import numpy as np from sklearn.model_selection import train_test_split from sklearn.multioutput import MultiOutputClassifier from sklearn.tree import DecisionTreeClassifier from sklearn.metrics import accuracy_score, hamming_loss # Create sample data: 100 samples, 5 features X = np.random.rand(100, 5) # Create multi-label targets: 3 labels per sample # Each label is 0 or 1 randomly Y = np.random.randint(2, size=(100, 3)) # Split data X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42) # Create multi-label model model = MultiOutputClassifier(DecisionTreeClassifier(random_state=42)) # Train model model.fit(X_train, Y_train) # Predict Y_pred = model.predict(X_test) # Calculate accuracy per label acc = [accuracy_score(Y_test[:, i], Y_pred[:, i]) for i in range(Y.shape[1])] # Calculate Hamming loss (fraction of wrong labels) hloss = hamming_loss(Y_test, Y_pred) print(f"Accuracy per label: {acc}") print(f"Hamming loss: {hloss:.3f}")
import numpy as np preds = np.array([[0.8, 0.1, 0.6], [0.3, 0.7, 0.2]]) threshold = 0.5 binary_preds = (preds > threshold).astype(int) print(binary_preds)