projekty
/
imago.git
/ commitdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
| commitdiff |
tree
raw
|
patch
|
inline
| side by side (from parent 1:
8ad37ef
)
comments for k_means
author
Tomas Musil
<tomik.musil@gmail.com>
Tue, 24 Jun 2014 13:13:30 +0000
(15:13 +0200)
committer
Tomas Musil
<tomik.musil@gmail.com>
Tue, 24 Jun 2014 13:13:30 +0000
(15:13 +0200)
imago_pack/k_means.py
patch
|
blob
|
history
diff --git
a/imago_pack/k_means.py
b/imago_pack/k_means.py
index
c5ddba5
..
f429824
100644
(file)
--- a/
imago_pack/k_means.py
+++ b/
imago_pack/k_means.py
@@
-19,6
+19,7
@@
def cluster(k, d, data, i_centers=None):
return clusters
def next_step(centers, data):
return clusters
def next_step(centers, data):
+ """Compute new clusters and centers."""
clusters = [[] for _ in centers]
for point in data:
clusters[nearest(centers, point)].append(point)
clusters = [[] for _ in centers]
for point in data:
clusters[nearest(centers, point)].append(point)
@@
-26,12
+27,16
@@
def next_step(centers, data):
return clusters, centers
def nearest(centers, point):
return clusters, centers
def nearest(centers, point):
+ """Find the nearest cluster *center* for *point*."""
d, i = min(((sum((p - c) ** 2 for (p, c) in zip(point[0], center)) ** 0.5 ,
index) if center else (float('inf'), len(centers)))
for (index, center) in enumerate(centers))
return i
def centroid(cluster):
d, i = min(((sum((p - c) ** 2 for (p, c) in zip(point[0], center)) ** 0.5 ,
index) if center else (float('inf'), len(centers)))
for (index, center) in enumerate(centers))
return i
def centroid(cluster):
+ """Find the centroid of the *cluster*."""
+ # TODO is this just a mean of coordinates?
+ # TODO should we try different definitions?
l = float(len(cluster))
try:
d = len(cluster[0][0]) #TODO empty cluster error
l = float(len(cluster))
try:
d = len(cluster[0][0]) #TODO empty cluster error
@@
-40,5
+45,7
@@
def centroid(cluster):
return [sum(c[0][i] for c in cluster) / l for i in range(d)]
def delta(c1, c2):
return [sum(c[0][i] for c in cluster) / l for i in range(d)]
def delta(c1, c2):
+ """Find the absolute distance between two lists of points."""
+ # TODO rewrite this to a sane form
return sum((sum(abs(cc1 - cc2) for (cc1, cc2) in zip (ccc1, ccc2)) if ccc2
else 0.) for (ccc1, ccc2) in zip(c1, c2))
return sum((sum(abs(cc1 - cc2) for (cc1, cc2) in zip (ccc1, ccc2)) if ccc2
else 0.) for (ccc1, ccc2) in zip(c1, c2))