代码之家  ›  专栏  ›  技术社区  ›  ASH

尝试动态创建图形

  •  0
  • ASH  · 技术社区  · 5 年前

    我可以手动创建一个带有5个质心的kmeans数据图表(代码如下)。

    # computing K-Means with K = 5 (5 clusters)
    centroids,_ = kmeans(data,5)
    # assign each sample to a cluster
    idx,_ = vq(data,centroids)
    
    # some plotting using numpy's logical indexing
    plot(data[idx==0,0],data[idx==0,1],'ob',
         data[idx==1,0],data[idx==1,1],'oy',
         data[idx==2,0],data[idx==2,1],'or',
         data[idx==3,0],data[idx==3,1],'og',
         data[idx==4,0],data[idx==4,1],'om')
    plot(centroids[:,0],centroids[:,1],'sg',markersize=15)
    show()
    

    for i in range(2, 20):
       plot(data[idx==[i],0],data[idx==[i],1],'some_dynamic_color'
    plot(centroids[:,0],centroids[:,1],'sg',markersize=15)
    show()
    

    最后,这里是我的数据数组,供参考。我甚至不确定这是否与眼前的问题有关。

    array([[ 0.01160815,  0.28552583],
           [ 0.01495681,  0.24965798],
           [ 0.52218559,  0.26969486],
           [ 0.16408791,  0.30713289],
           [ 0.35037607,  0.28401598],
           [-0.32413957,  0.53144262],
           [ 0.10853278,  0.19756793],
           [ 0.08275109,  0.18140047],
           [-0.04350157,  0.26407197],
           [-0.04789838,  0.31644537],
           [-0.03852801,  0.21557165],
           [ 0.02213885,  0.20033466],
           [-0.80612714,  0.35888803],
           [-0.27971428,  0.3195602 ],
           [ 0.21359135,  0.14144335],
           [ 0.09936109,  0.22313638],
           [ 0.15504834,  0.17022939],
           [ 0.47012351,  0.41452523],
           [ 0.28616062,  0.23098198],
           [ 0.25941178,  0.14843141],
           [ 0.20049158,  0.23769455],
           [-0.19766684,  0.39110416],
           [-0.29619519,  0.53520109],
           [ 0.29319037,  0.23907492],
           [ 0.16644319,  0.18737667],
           [ 0.37407685,  0.22463339],
           [-0.34262982,  0.40264906],
           [ 0.52658291,  0.3542729 ],
           [ 0.5747167 ,  0.50042607],
           [ 0.15607962,  0.20861585],
           [-0.50769188,  0.34266008],
           [ 0.43373588,  0.22526141],
           [ 0.1624051 ,  0.29859298],
           [ 0.22789948,  0.20157262],
           [-0.1179015 ,  0.21471169],
           [ 0.26108742,  0.26604149],
           [ 0.10019146,  0.25547835],
           [ 0.18906467,  0.19078555],
           [-0.02575308,  0.2877592 ],
           [-0.45292564,  0.51866493],
           [ 0.11516754,  0.21504329],
           [ 0.10020043,  0.23943587],
           [ 0.21402611,  0.34297039],
           [ 0.24574342,  0.15734118],
           [ 0.58083355,  0.22886509],
           [ 0.33975699,  0.33309233],
           [ 0.19002609,  0.14372212],
           [ 0.35220577,  0.23879166],
           [ 0.27427999,  0.1529184 ],
           [ 0.06261825,  0.18908223],
           [ 0.25005859,  0.21363957],
           [ 0.1676683 ,  0.26111871],
           [ 0.14703364,  0.25532777],
           [ 0.26130579,  0.14012819],
           [-0.14897454,  0.23037735],
           [-0.26827493,  0.23193457],
           [ 0.51701526,  0.17887009],
           [-0.05870745,  0.18040883],
           [ 0.25651599,  0.227289  ],
           [ 0.06881783,  0.28114007],
           [ 0.43079653,  0.21510341]])
    

    谢谢。

    0 回复  |  直到 5 年前
        1
  •  2
  •   shimo    5 年前

    for循环的索引i应该在0到4之间(有5个质心)。

    for i in range(0, 5):
       plot(data[idx==[i],0],data[idx==[i],1],'some_dynamic_color' ...
    

    import matplotlib.pyplot as plt
    from matplotlib import cm
    import numpy as np
    from sklearn.cluster import KMeans
    
    data = np.array(#your data)
    
    kmeans = KMeans(n_clusters=5)
    kmeans.fit(data)
    y_kmeans = kmeans.predict(data)
    viridis = cm.get_cmap('viridis', 5)
    for i in range(0, len(data)):
        plt.scatter(data[i,0], data[i,1], c=viridis(y_kmeans[i]), s= 50)
    centers = kmeans.cluster_centers_
    plt.scatter(centers[:, 0], centers[:, 1], c='black', s=200, alpha=0.5)
    

    enter image description here

    https://jakevdp.github.io/PythonDataScienceHandbook/05.11-k-means.html