X-Git-Url: http://git.tomasm.cz/imago.git/blobdiff_plain/841662dc25b40dce151a0bf7a024e55682028f18..966b2dcf6c734ffb729e2f8bab000d43586cf7eb:/src/k_means.py diff --git a/src/k_means.py b/src/k_means.py index f429824..5260601 100644 --- a/src/k_means.py +++ b/src/k_means.py @@ -4,19 +4,21 @@ import random def cluster(k, d, data, i_centers=None): """Find *k* clusters on *d* dimensional *data*.""" - borders = [(min(p[0][i] for p in data), max(p[0][i] for p in data)) - for i in range(d) ] if i_centers: old_centers = i_centers else: + borders = [(min(p[0][i] for p in data), max(p[0][i] for p in data)) + for i in range(d)] old_centers = [[(h - l) * random.random() + l for (l, h) in borders] for _ in range(k)] clusters, centers = next_step(old_centers, data) while delta(old_centers, centers) > 0: old_centers = centers clusters, centers = next_step(old_centers, data) - - return clusters + dst = lambda c, p: sum((a - b) ** 2 for (a, b) in zip(p, c)) ** 0.5 + score = sum([sum(map(lambda p: dst(c, p[0]), clus)) for clus, c in + zip(clusters, centers)]) + return clusters, score def next_step(centers, data): """Compute new clusters and centers."""