SOM kmean оптимизация ValueError: все входные массивы должны иметь одинаковое количество измерений

Я пытаюсь объединить kmeans в SOM, чтобы найти лучший подходящий юнит. Во время кластеризации точек для возврата номеров кластеров для каждой точки я сталкиваюсь с этой ошибкой

"ValueError: все входные массивы должны иметь одинаковое количество измерений" в строке 159

distances_from_center = np.concatenate((distances_from_center, [dist(teacher,nodes)]))

Я пытаюсь оптимизировать SOM, используя подход быстрого kmeans.

N = 8 # linear size of 2D map
M = 8
n_teacher = 10000 # # of teacher signal

np.random.seed(100)# test seed for random number



def main():

    # initialize node vectors

    nodes = np.random.rand(N,M,3)# node array. each node has 3-dim weight vector
    #nodes = centers_initiation(n_teacher, 4)
    #initial out put

    #TODO; make out put function to simplify here 

    plt.imshow(nodes, interpolation='none')

    plt.savefig("init.png")

    """"""

    """ Learning """

    """"""

    # teacher signal

    teachers = np.random.rand(n_teacher,3)

    for i in range(n_teacher):

        train(nodes, teachers, i)

        # intermediate out put

        if i%200 ==0 or i< 100: #out put for i<100 or each 1000 iteration

            plt.imshow(nodes, interpolation='none')

            plt.savefig(str(i)+".png")

    #output

    plt.imshow(nodes, interpolation='none')

    plt.savefig("final.png")



def train(nodes, teachers, i):

    bmu = best_matching_unit(nodes, teachers[i])

    #print bmu

    for x in range(N):

        for y in range(M):

            c = np.array([x,y])# coordinate of unit

            d = np.linalg.norm(c-bmu)

            L = learning_ratio(i)

            S = learning_radius(i,d)

            for z in range(3): #TODO clear up using numpy function

                nodes[x,y,z] += L*S*(teachers[i,z] - nodes[x,y,z])




def dist(x, y):

    # euclidean distance

    if len(x.shape) == 1:

        d = np.sqrt(np.sum((x - y) ** 2))

    else:

        d = np.sqrt(np.sum((x - y) ** 2, axis=1))

    return d


def centers_initiation(teacher, number_of_centers):

    # initialization of clusters centers as most distant points. return cluster centers (point)

    dist_per_point = np.empty((0, 0), int)

    dist_for_point = 0

    index_of_deleted_point = 0

    for point in teacher:


        for other_point in np.delete(teacher, index_of_deleted_point, axis=0):

            dist_for_point += dist(point, other_point)

        dist_per_point = np.append(dist_per_point, dist_for_point)

        dist_for_point = 0

        index_of_deleted_point += 1

    ordered_points_by_min = np.array(

            [key for key, value in sorted(enumerate(dist_per_point), key=lambda p: p[1], reverse=True)])

    return teacher[ordered_points_by_min[0:number_of_centers]]


def get_cluster_number(teacher, nodes):

    # clustering points. return numbers of clusters for each point

    distances_from_centers = np.zeros((0, nodes.shape[0]), int)

    for point in teacher:

        distances_from_center = np.array([])

        for center in nodes:

            distances_from_center = np.concatenate((distances_from_center, [dist(teacher,nodes)]))

        distances_from_centers = np.concatenate((distances_from_centers, [distances_from_center]), axis=0)

    nearest_center_number = np.argmin(distances_from_centers, axis=1)

    return nearest_center_number



def best_matching_unit(teacher, nodes):

    clusters = get_cluster_number(teacher, nodes)

    clusters_centers_shift = 1

    new_centers = np.zeros(nodes.shape)

    counter = 0

    while np.sum(clusters_centers_shift) != 0:

        counter += 1

        for i in xrange(nodes.shape[0]):

            new_centers[i] = np.mean(teacher[:][clusters == i], axis=0)

        clusters_centers_shift = dist(new_centers, nodes)

        clusters = get_cluster_number(teacher, new_centers)

        nodes = np.copy(new_centers)

    return clusters





def neighbourhood(t):#neighbourhood radious

    halflife = float(n_teacher/4) #for testing

    initial  = float(N/2)

    return initial*np.exp(-t/halflife)



def learning_ratio(t):

    halflife = float(n_teacher/4) #for testing

    initial  = 0.1

    return initial*np.exp(-t/halflife)



def learning_radius(t, d):

    # d is distance from BMU

    s = neighbourhood(t)

    return np.exp(-d**2/(2*s**2))



main()

0 ответов

Другие вопросы по тегам