def cluster(k, d, data, i_centers=None):
"""Find *k* clusters on *d* dimensional *data*."""
- borders = [(min(p[0][i] for p in data), max(p[0][i] for p in data))
- for i in range(d) ]
if i_centers:
old_centers = i_centers
else:
+ borders = [(min(p[0][i] for p in data), max(p[0][i] for p in data))
+ for i in range(d)]
old_centers = [[(h - l) * random.random() + l for (l, h) in borders]
for _ in range(k)]
clusters, centers = next_step(old_centers, data)
while delta(old_centers, centers) > 0:
old_centers = centers
clusters, centers = next_step(old_centers, data)
-
- return clusters
+ dst = lambda c, p: sum((a - b) ** 2 for (a, b) in zip(p, c)) ** 0.5
+ score = sum([sum(map(lambda p: dst(c, p[0]), clus)) for clus, c in
+ zip(clusters, centers)])
+ return clusters, score
def next_step(centers, data):
"""Compute new clusters and centers."""