""" Common utilities for testing clustering. """ import numpy as np ############################################################################### # Generate sample data def generate_clustered_data(seed=0, n_clusters=3, n_features=2, n_samples_per_cluster=20, std=.4): prng = np.random.RandomState(seed) # the data is voluntary shifted away from zero to check clustering # algorithm robustness with regards to non centered data means = np.array([[1, 1, 1, 0], [-1, -1, 0, 1], [1, -1, 1, 1], [-1, 1, 1, 0], ]) + 10 X = np.empty((0, n_features)) for i in range(n_clusters): X = np.r_[X, means[i][:n_features] + std * prng.randn(n_samples_per_cluster, n_features)] return X