From: Tomas Musil Date: Mon, 7 Jan 2013 22:15:07 +0000 (+0100) Subject: k-means clustering X-Git-Url: http://git.tomasm.cz/imago.git/commitdiff_plain/24a7e923346be5e355a7d61e642fc469310444ef?ds=sidebyside;hp=c13a5fb97c22ae952b4a99e75735f9c96efcf438 k-means clustering --- diff --git a/gridf.py b/gridf.py index 03e3014..3bb7c54 100644 --- a/gridf.py +++ b/gridf.py @@ -30,7 +30,7 @@ def job_4(x, y, w, z, im_l, v1, v2, h1, h2, dv, dh, size): h2 = (h2[0] + z * dh, h2[1] + z) return (distance(im_l, get_grid([v1, v2], [h1, h2], size), size)) -def find(lines, size, l1, l2, bounds, hough, do_something, im_h): +def find(lines, size, l1, l2, bounds, hough, show_all, do_something): l1 = line_from_angl_dist(l1, size) l2 = line_from_angl_dist(l2, size) v1 = V(*l1[0]) - V(*l1[1]) @@ -56,8 +56,8 @@ def find(lines, size, l1, l2, bounds, hough, do_something, im_h): #GaussianBlur is undocumented class, may not work in future versions of PIL im_l_s = im_l.tostring() - import time - start = time.time() + #import time + #start = time.time() f_dist = partial(job_4, im_l=im_l_s, v1=v1, v2=v2, h1=h1, h2=h2, dv=delta_v, dh=delta_h, size=size) @@ -73,7 +73,7 @@ def find(lines, size, l1, l2, bounds, hough, do_something, im_h): grid_lines = [[l2ad(l, size) for l in grid[0]], [l2ad(l, size) for l in grid[1]]] - print time.time() - start + #print time.time() - start ### Show error surface # @@ -82,20 +82,22 @@ def find(lines, size, l1, l2, bounds, hough, do_something, im_h): # delta_v, delta_h, x_v, y_v, x_h, y_h, size) ### + if show_all: + ### Show grid over lines # - im_t = Image.new('RGB', im_l.size, None) - im_t_l = im_t.load() - im_l_l = im_l.load() - for x in xrange(im_t.size[0]): - for y in xrange(im_t.size[1]): - im_t_l[x, y] = (im_l_l[x, y], 0, 0) - - im_t_d = ImageDraw.Draw(im_t) - for l in grid[0] + grid[1]: - im_t_d.line(l, width=1, fill=(0, 255, 0)) - - do_something(im_t, "lines and grid") + im_t = Image.new('RGB', im_l.size, None) + im_t_l = im_t.load() + im_l_l = im_l.load() + for x in xrange(im_t.size[0]): + for y in xrange(im_t.size[1]): + im_t_l[x, y] = (im_l_l[x, y], 0, 0) + + im_t_d = ImageDraw.Draw(im_t) + for l in grid[0] + grid[1]: + im_t_d.line(l, width=1, fill=(0, 255, 0)) + + do_something(im_t, "lines and grid") ### return grid, grid_lines diff --git a/imago.py b/imago.py index 136574a..33e7c69 100755 --- a/imago.py +++ b/imago.py @@ -86,7 +86,7 @@ def main(): lines, l1, l2, bounds, hough, im_h = linef.find_lines(image, show_all, do_something, verbose) grid, lines = gridf.find(lines, image.size, l1, l2, bounds, hough, - do_something, im_h) + show_all, do_something) if show_all: im_g = image.copy() draw = ImageDraw.Draw(im_g) @@ -94,18 +94,7 @@ def main(): draw.line(l, fill=(64, 255, 64), width=1) do_something(im_g, "grid", name="grid") - board, board_raw = intrsc.board(image, lines, show_all, do_something) - - ### Show color distribution - #import matplotlib.pyplot as pyplot - #luma = [(0.30 * s[0] + 0.59 * s[1] + 0.11 * s[2]) / 255. - # for s in sum(board_raw, [])] - #pyplot.scatter(luma, - # [(max(s) - min(s)) / (255 - abs(max(s) + min(s) - 255)) - # for s in sum(board_raw, [])], - # color=[(s[0]/255., s[1]/255., s[2]/255., 1.) for s in sum(board_raw, [])]) - #pyplot.show() - ### + board = intrsc.board(image, lines, show_all, do_something) #simple ASCII output: for line in board: diff --git a/intrsc.py b/intrsc.py index ba36b6d..97f9987 100644 --- a/intrsc.py +++ b/intrsc.py @@ -5,6 +5,8 @@ from operator import itemgetter import ImageDraw +import k_means + def dst(line): """Return normalized line.""" if line[0] < pi / 2: @@ -33,15 +35,59 @@ def board(image, lines, show_all, do_something): draw.point((x , y), fill=(120, 255, 120)) do_something(image_g, "intersections") - board_r = [] board_raw = [] for line in intersections: - board_r.append([stone_color(image, intersection) for intersection in - line]) board_raw.append([stone_color_raw(image, intersection) for intersection in line]) - return board_r, board_raw + board_raw = sum(board_raw, []) + + ### Show color distribution + luma = [(0.30 * s[0] + 0.59 * s[1] + 0.11 * s[2]) / 255. + for s in board_raw] + saturation = [(max(s) - min(s)) / (255 - abs(max(s) + min(s) - 255)) + for s in board_raw] + if show_all: + import matplotlib.pyplot as pyplot + pyplot.scatter(luma, saturation, color=[(s[0]/255., s[1]/255., s[2]/255., 1.) + for s in board_raw]) + pyplot.show() + + clusters = k_means.cluster(3, 2,zip(zip(luma, saturation), range(len(luma))), + [[0., 0.], [0.5, 0.25], [1., 0.5]]) + #clusters.sort(key=mean_luma) + + if show_all: + pyplot.scatter([d[0][0] for d in clusters[0]], [d[0][1] for d in clusters[0]], + color=(1,0,0,1)) + pyplot.scatter([d[0][0] for d in clusters[1]], [d[0][1] for d in clusters[1]], + color=(0,1,0,1)) + pyplot.scatter([d[0][0] for d in clusters[2]], [d[0][1] for d in clusters[2]], + color=(0,0,1,1)) + pyplot.show() + + clusters[0] = [(p[1], 'B') for p in clusters[0]] + clusters[1] = [(p[1], '.') for p in clusters[1]] + clusters[2] = [(p[1], 'W') for p in clusters[2]] + + board_rl = sum(clusters, []) + board_rl.sort() + board_rg = (p[1] for p in board_rl) + + board_r = [] + + try: + for i in xrange(19): + board_r.append([]) + for _ in xrange(19): + board_r[i].append(board_rg.next()) + except StopIteration: + pass + + return board_r + +def mean_luma(cluster): + return sum(c[0][0] for c in cluster) / float(len(cluster)) def intersections_from_angl_dist(lines, size, get_all=True): """Take grid-lines and size of the image. Return intersections.""" @@ -60,23 +106,6 @@ def intersections_from_angl_dist(lines, size, get_all=True): intersections.append(line) return intersections -def stone_color(image, (x, y)): - """Given image and coordinates, return stone color.""" - suma = 0. - for i in range(-2, 3): - for j in range(-2, 3): - try: - suma += sum(image.getpixel((x + i, y + j))) - except IndexError: - pass - suma /= 3 * 25 - if suma < 55: - return 'B' - elif suma < 200: - return '.' - else: - return 'W' - def stone_color_raw(image, (x, y)): """Given image and coordinates, return stone color.""" suma = [] diff --git a/k_means.py b/k_means.py new file mode 100644 index 0000000..0019597 --- /dev/null +++ b/k_means.py @@ -0,0 +1,43 @@ +"""K-means module""" + +import random + +def cluster(k, d, data, i_centers=None): + borders = [(min(p[0][i] for p in data), max(p[0][i] for p in data)) + for i in range(d) ] + if i_centers: + old_centers = i_centers + else: + old_centers = [[(h - l) * random.random() + l for (l, h) in borders] + for _ in range(k)] + clusters, centers = next_step(old_centers, data) + while delta(old_centers, centers) > 0: + old_centers = centers + clusters, centers = next_step(old_centers, data) + + return clusters + +def next_step(centers, data): + clusters = [[] for _ in centers] + for point in data: + clusters[nearest(centers, point)].append(point) + centers = [centroid(c) for c in clusters] + return clusters, centers + +def nearest(centers, point): + d, i = min(((sum((p - c) ** 2 for (p, c) in zip(point[0], center)) ** 0.5 , + index) if center else (float('inf'), len(centers))) + for (index, center) in enumerate(centers)) + return i + +def centroid(cluster): + l = float(len(cluster)) + try: + d = len(cluster[0][0]) #TODO empty cluster error + except IndexError: + return None + return [sum(c[0][i] for c in cluster) / l for i in range(d)] + +def delta(c1, c2): + return sum( (sum(abs(cc1 - cc2) for (cc1, cc2) in zip (ccc1, ccc2)) if ccc2 + else 0.) for (ccc1, ccc2) in zip(c1, c2))