From: Tomas Musil Date: Tue, 24 Jun 2014 13:13:30 +0000 (+0200) Subject: comments for k_means X-Git-Url: http://git.tomasm.cz/imago.git/commitdiff_plain/dfd21c7829d456a1c05db4fb0ef10c5994e2469a comments for k_means --- diff --git a/imago_pack/k_means.py b/imago_pack/k_means.py index c5ddba5..f429824 100644 --- a/imago_pack/k_means.py +++ b/imago_pack/k_means.py @@ -19,6 +19,7 @@ def cluster(k, d, data, i_centers=None): return clusters def next_step(centers, data): + """Compute new clusters and centers.""" clusters = [[] for _ in centers] for point in data: clusters[nearest(centers, point)].append(point) @@ -26,12 +27,16 @@ def next_step(centers, data): return clusters, centers def nearest(centers, point): + """Find the nearest cluster *center* for *point*.""" d, i = min(((sum((p - c) ** 2 for (p, c) in zip(point[0], center)) ** 0.5 , index) if center else (float('inf'), len(centers))) for (index, center) in enumerate(centers)) return i def centroid(cluster): + """Find the centroid of the *cluster*.""" + # TODO is this just a mean of coordinates? + # TODO should we try different definitions? l = float(len(cluster)) try: d = len(cluster[0][0]) #TODO empty cluster error @@ -40,5 +45,7 @@ def centroid(cluster): return [sum(c[0][i] for c in cluster) / l for i in range(d)] def delta(c1, c2): + """Find the absolute distance between two lists of points.""" + # TODO rewrite this to a sane form return sum((sum(abs(cc1 - cc2) for (cc1, cc2) in zip (ccc1, ccc2)) if ccc2 else 0.) for (ccc1, ccc2) in zip(c1, c2))