Introduction
Cluster evaluation metrics help us check how well our groups (clusters) of data points are formed. They tell us if the clusters are tight and well separated.
Jump into concepts and practice - no test required
metric_name(true_labels, predicted_labels) # Example metrics: # - adjusted_rand_score(true_labels, predicted_labels) # - silhouette_score(data, predicted_labels) # - calinski_harabasz_score(data, predicted_labels) # - davies_bouldin_score(data, predicted_labels)
from sklearn.metrics import adjusted_rand_score true_labels = [0, 0, 1, 1, 2, 2] predicted_labels = [0, 0, 1, 2, 2, 2] score = adjusted_rand_score(true_labels, predicted_labels) print(score)
from sklearn.metrics import silhouette_score import numpy as np data = np.array([[1, 2], [1, 4], [1, 0], [10, 2], [10, 4], [10, 0]]) predicted_labels = [0, 0, 0, 1, 1, 1] score = silhouette_score(data, predicted_labels) print(score)
from sklearn.cluster import KMeans from sklearn.metrics import silhouette_score, calinski_harabasz_score, davies_bouldin_score import numpy as np # Sample data: 2 groups of points X = np.array([[1, 2], [1, 4], [1, 0], [10, 2], [10, 4], [10, 0]]) # Create KMeans clustering with 2 clusters kmeans = KMeans(n_clusters=2, random_state=42) kmeans.fit(X) labels = kmeans.labels_ # Calculate cluster evaluation metrics sil_score = silhouette_score(X, labels) calinski_score = calinski_harabasz_score(X, labels) davies_score = davies_bouldin_score(X, labels) print(f"Silhouette Score: {sil_score:.3f}") print(f"Calinski-Harabasz Score: {calinski_score:.3f}") print(f"Davies-Bouldin Score: {davies_score:.3f}")
X and cluster labels labels?from sklearn.metrics import davies_bouldin_score X = [[1, 2], [2, 1], [10, 10], [11, 11]] labels = [0, 0, 1, 1] score = davies_bouldin_score(X, labels) print(round(score, 2))
from sklearn.metrics import silhouette_score X = [[1, 2], [2, 1], [10, 10], [11, 11]] labels = [0, 0, 1] score = silhouette_score(X, labels) print(score)