import numpy as np
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
# Sample word embeddings (5 words, 50-dimensional vectors simulated)
words = ['cat', 'dog', 'apple', 'orange', 'car']
embeddings = np.array([
np.random.normal(0, 1, 50) + 1, # cat
np.random.normal(0, 1, 50) + 1, # dog
np.random.normal(0, 1, 50) - 1, # apple
np.random.normal(0, 1, 50) - 1, # orange
np.random.normal(0, 1, 50) + 3 # car
])
# Normalize embeddings
embeddings_norm = embeddings / np.linalg.norm(embeddings, axis=1, keepdims=True)
# Apply t-SNE
tsne = TSNE(n_components=2, random_state=42)
embeddings_2d = tsne.fit_transform(embeddings_norm)
# Plot
plt.figure(figsize=(8, 6))
plt.scatter(embeddings_2d[:, 0], embeddings_2d[:, 1], color='blue')
for i, word in enumerate(words):
plt.text(embeddings_2d[i, 0] + 0.01, embeddings_2d[i, 1] + 0.01, word, fontsize=12)
plt.title('t-SNE visualization of word embeddings')
plt.xlabel('Dimension 1')
plt.ylabel('Dimension 2')
plt.grid(True)
plt.show()